1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2025 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#define INCLUDE_STRING
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "rtl.h"
28#include "tree.h"
29#include "memmodel.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "cfgloop.h"
33#include "df.h"
34#include "tm_p.h"
35#include "stringpool.h"
36#include "expmed.h"
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "cgraph.h"
42#include "diagnostic.h"
43#include "cfgbuild.h"
44#include "alias.h"
45#include "fold-const.h"
46#include "attribs.h"
47#include "calls.h"
48#include "stor-layout.h"
49#include "varasm.h"
50#include "output.h"
51#include "insn-attr.h"
52#include "flags.h"
53#include "except.h"
54#include "explow.h"
55#include "expr.h"
56#include "cfgrtl.h"
57#include "common/common-target.h"
58#include "langhooks.h"
59#include "reload.h"
60#include "gimplify.h"
61#include "dwarf2.h"
62#include "tm-constrs.h"
63#include "cselib.h"
64#include "sched-int.h"
65#include "opts.h"
66#include "tree-pass.h"
67#include "context.h"
68#include "pass_manager.h"
69#include "target-globals.h"
70#include "gimple-iterator.h"
71#include "gimple-fold.h"
72#include "tree-vectorizer.h"
73#include "shrink-wrap.h"
74#include "builtins.h"
75#include "rtl-iter.h"
76#include "tree-iterator.h"
77#include "dbgcnt.h"
78#include "case-cfn-macros.h"
79#include "dojump.h"
80#include "fold-const-call.h"
81#include "tree-vrp.h"
82#include "tree-ssanames.h"
83#include "selftest.h"
84#include "selftest-rtl.h"
85#include "print-rtl.h"
86#include "intl.h"
87#include "ifcvt.h"
88#include "symbol-summary.h"
89#include "sreal.h"
90#include "ipa-cp.h"
91#include "ipa-prop.h"
92#include "ipa-fnsummary.h"
93#include "wide-int-bitmask.h"
94#include "tree-vector-builder.h"
95#include "debug.h"
96#include "dwarf2out.h"
97#include "i386-options.h"
98#include "i386-builtins.h"
99#include "i386-expand.h"
100#include "i386-features.h"
101#include "function-abi.h"
102#include "rtl-error.h"
103#include "gimple-pretty-print.h"
104
105/* This file should be included last. */
106#include "target-def.h"
107
108static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
110
111
112#ifndef CHECK_STACK_LIMIT
113#define CHECK_STACK_LIMIT (-1)
114#endif
115
116/* Return index of given mode in mult and division cost tables. */
117#define MODE_INDEX(mode) \
118 ((mode) == QImode ? 0 \
119 : (mode) == HImode ? 1 \
120 : (mode) == SImode ? 2 \
121 : (mode) == DImode ? 3 \
122 : 4)
123
124
125/* Set by -mtune. */
126const struct processor_costs *ix86_tune_cost = NULL;
127
128/* Set by -mtune or -Os. */
129const struct processor_costs *ix86_cost = NULL;
130
131/* In case the average insn count for single function invocation is
132 lower than this constant, emit fast (but longer) prologue and
133 epilogue code. */
134#define FAST_PROLOGUE_INSN_COUNT 20
135
136/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
140
141/* Array of the smallest class containing reg number REGNO, indexed by
142 REGNO. Used by REGNO_REG_CLASS in i386.h. */
143
144enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
145{
146 /* ax, dx, cx, bx */
147 AREG, DREG, CREG, BREG,
148 /* si, di, bp, sp */
149 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 /* FP registers */
151 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 /* arg pointer, flags, fpsr, frame */
154 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 /* SSE registers */
156 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 /* MMX registers */
159 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 /* REX registers */
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 /* SSE REX registers */
165 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 /* AVX-512 SSE registers */
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 /* Mask registers. */
173 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 /* REX2 registers */
176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180};
181
182/* The "default" register map used in 32bit mode. */
183
184unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
185{
186 /* general regs */
187 0, 2, 1, 3, 6, 7, 4, 5,
188 /* fp regs */
189 12, 13, 14, 15, 16, 17, 18, 19,
190 /* arg, flags, fpsr, frame */
191 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 /* SSE */
194 21, 22, 23, 24, 25, 26, 27, 28,
195 /* MMX */
196 29, 30, 31, 32, 33, 34, 35, 36,
197 /* extended integer registers */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* extended sse registers */
201 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 /* AVX-512 registers 16-23 */
204 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 /* AVX-512 registers 24-31 */
207 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 /* Mask registers */
210 93, 94, 95, 96, 97, 98, 99, 100
211};
212
213/* The "default" register map used in 64bit mode. */
214
215unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
216{
217 /* general regs */
218 0, 1, 2, 3, 4, 5, 6, 7,
219 /* fp regs */
220 33, 34, 35, 36, 37, 38, 39, 40,
221 /* arg, flags, fpsr, frame */
222 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 /* SSE */
225 17, 18, 19, 20, 21, 22, 23, 24,
226 /* MMX */
227 41, 42, 43, 44, 45, 46, 47, 48,
228 /* extended integer registers */
229 8, 9, 10, 11, 12, 13, 14, 15,
230 /* extended SSE registers */
231 25, 26, 27, 28, 29, 30, 31, 32,
232 /* AVX-512 registers 16-23 */
233 67, 68, 69, 70, 71, 72, 73, 74,
234 /* AVX-512 registers 24-31 */
235 75, 76, 77, 78, 79, 80, 81, 82,
236 /* Mask registers */
237 118, 119, 120, 121, 122, 123, 124, 125,
238 /* rex2 extend interger registers */
239 130, 131, 132, 133, 134, 135, 136, 137,
240 138, 139, 140, 141, 142, 143, 144, 145
241};
242
243/* Define the register numbers to be used in Dwarf debugging information.
244 The SVR4 reference port C compiler uses the following register numbers
245 in its Dwarf output code:
246 0 for %eax (gcc regno = 0)
247 1 for %ecx (gcc regno = 2)
248 2 for %edx (gcc regno = 1)
249 3 for %ebx (gcc regno = 3)
250 4 for %esp (gcc regno = 7)
251 5 for %ebp (gcc regno = 6)
252 6 for %esi (gcc regno = 4)
253 7 for %edi (gcc regno = 5)
254 The following three DWARF register numbers are never generated by
255 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 believed these numbers have these meanings.
257 8 for %eip (no gcc equivalent)
258 9 for %eflags (gcc regno = 17)
259 10 for %trapno (no gcc equivalent)
260 It is not at all clear how we should number the FP stack registers
261 for the x86 architecture. If the version of SDB on x86/svr4 were
262 a bit less brain dead with respect to floating-point then we would
263 have a precedent to follow with respect to DWARF register numbers
264 for x86 FP registers, but the SDB on x86/svr4 was so completely
265 broken with respect to FP registers that it is hardly worth thinking
266 of it as something to strive for compatibility with.
267 The version of x86/svr4 SDB I had does (partially)
268 seem to believe that DWARF register number 11 is associated with
269 the x86 register %st(0), but that's about all. Higher DWARF
270 register numbers don't seem to be associated with anything in
271 particular, and even for DWARF regno 11, SDB only seemed to under-
272 stand that it should say that a variable lives in %st(0) (when
273 asked via an `=' command) if we said it was in DWARF regno 11,
274 but SDB still printed garbage when asked for the value of the
275 variable in question (via a `/' command).
276 (Also note that the labels SDB printed for various FP stack regs
277 when doing an `x' command were all wrong.)
278 Note that these problems generally don't affect the native SVR4
279 C compiler because it doesn't allow the use of -O with -g and
280 because when it is *not* optimizing, it allocates a memory
281 location for each floating-point variable, and the memory
282 location is what gets described in the DWARF AT_location
283 attribute for the variable in question.
284 Regardless of the severe mental illness of the x86/svr4 SDB, we
285 do something sensible here and we use the following DWARF
286 register numbers. Note that these are all stack-top-relative
287 numbers.
288 11 for %st(0) (gcc regno = 8)
289 12 for %st(1) (gcc regno = 9)
290 13 for %st(2) (gcc regno = 10)
291 14 for %st(3) (gcc regno = 11)
292 15 for %st(4) (gcc regno = 12)
293 16 for %st(5) (gcc regno = 13)
294 17 for %st(6) (gcc regno = 14)
295 18 for %st(7) (gcc regno = 15)
296*/
297unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
298{
299 /* general regs */
300 0, 2, 1, 3, 6, 7, 5, 4,
301 /* fp regs */
302 11, 12, 13, 14, 15, 16, 17, 18,
303 /* arg, flags, fpsr, frame */
304 IGNORED_DWARF_REGNUM, 9,
305 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 /* SSE registers */
307 21, 22, 23, 24, 25, 26, 27, 28,
308 /* MMX registers */
309 29, 30, 31, 32, 33, 34, 35, 36,
310 /* extended integer registers */
311 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 /* extended sse registers */
314 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 /* AVX-512 registers 16-23 */
317 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 /* AVX-512 registers 24-31 */
320 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 /* Mask registers */
323 93, 94, 95, 96, 97, 98, 99, 100
324};
325
326/* Define parameter passing and return registers. */
327
328static int const x86_64_int_parameter_registers[6] =
329{
330 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
331};
332
333static int const x86_64_ms_abi_int_parameter_registers[4] =
334{
335 CX_REG, DX_REG, R8_REG, R9_REG
336};
337
338static int const x86_64_int_return_registers[4] =
339{
340 AX_REG, DX_REG, DI_REG, SI_REG
341};
342
343/* Define the structure for the machine field in struct function. */
344
345struct GTY(()) stack_local_entry {
346 unsigned short mode;
347 unsigned short n;
348 rtx rtl;
349 struct stack_local_entry *next;
350};
351
352/* Which cpu are we scheduling for. */
353enum attr_cpu ix86_schedule;
354
355/* Which cpu are we optimizing for. */
356enum processor_type ix86_tune;
357
358/* Which instruction set architecture to use. */
359enum processor_type ix86_arch;
360
361/* True if processor has SSE prefetch instruction. */
362unsigned char ix86_prefetch_sse;
363
364/* Preferred alignment for stack boundary in bits. */
365unsigned int ix86_preferred_stack_boundary;
366
367/* Alignment for incoming stack boundary in bits specified at
368 command line. */
369unsigned int ix86_user_incoming_stack_boundary;
370
371/* Default alignment for incoming stack boundary in bits. */
372unsigned int ix86_default_incoming_stack_boundary;
373
374/* Alignment for incoming stack boundary in bits. */
375unsigned int ix86_incoming_stack_boundary;
376
377/* True if there is no direct access to extern symbols. */
378bool ix86_has_no_direct_extern_access;
379
380/* Calling abi specific va_list type nodes. */
381tree sysv_va_list_type_node;
382tree ms_va_list_type_node;
383
384/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
385char internal_label_prefix[16];
386int internal_label_prefix_len;
387
388/* Fence to use after loop using movnt. */
389tree x86_mfence;
390
391/* Register class used for passing given 64bit part of the argument.
392 These represent classes as documented by the PS ABI, with the exception
393 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
394 use SF or DFmode move instead of DImode to avoid reformatting penalties.
395
396 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
397 whenever possible (upper half does contain padding). */
398enum x86_64_reg_class
399 {
400 X86_64_NO_CLASS,
401 X86_64_INTEGER_CLASS,
402 X86_64_INTEGERSI_CLASS,
403 X86_64_SSE_CLASS,
404 X86_64_SSEHF_CLASS,
405 X86_64_SSESF_CLASS,
406 X86_64_SSEDF_CLASS,
407 X86_64_SSEUP_CLASS,
408 X86_64_X87_CLASS,
409 X86_64_X87UP_CLASS,
410 X86_64_COMPLEX_X87_CLASS,
411 X86_64_MEMORY_CLASS
412 };
413
414#define MAX_CLASSES 8
415
416/* Table of constants used by fldpi, fldln2, etc.... */
417static REAL_VALUE_TYPE ext_80387_constants_table [5];
418static bool ext_80387_constants_init;
419
420
421static rtx ix86_function_value (const_tree, const_tree, bool);
422static bool ix86_function_value_regno_p (const unsigned int);
423static unsigned int ix86_function_arg_boundary (machine_mode,
424 const_tree);
425static rtx ix86_static_chain (const_tree, bool);
426static int ix86_function_regparm (const_tree, const_tree);
427static void ix86_compute_frame_layout (void);
428static tree ix86_canonical_va_list_type (tree);
429static unsigned int split_stack_prologue_scratch_regno (void);
430static bool i386_asm_output_addr_const_extra (FILE *, rtx);
431
432static bool ix86_can_inline_p (tree, tree);
433static unsigned int ix86_minimum_incoming_stack_boundary (bool);
434
435typedef enum ix86_flags_cc
436{
437 X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
438 X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
439 X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
440 X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
441} ix86_cc;
442
443static const char *ix86_ccmp_dfv_mapping[] =
444{
445 "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
446 "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
447 "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
448 "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
449};
450
451
452/* Whether -mtune= or -march= were specified */
453int ix86_tune_defaulted;
454int ix86_arch_specified;
455
456/* Return true if a red-zone is in use. We can't use red-zone when
457 there are local indirect jumps, like "indirect_jump" or "tablejump",
458 which jumps to another place in the function, since "call" in the
459 indirect thunk pushes the return address onto stack, destroying
460 red-zone.
461
462 NB: Don't use red-zone for functions with no_caller_saved_registers
463 and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
464
465 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
466 for CALL, in red-zone, we can allow local indirect jumps with
467 indirect thunk. */
468
469bool
470ix86_using_red_zone (void)
471{
472 return (TARGET_RED_ZONE
473 && !TARGET_64BIT_MS_ABI
474 && (!TARGET_APX_EGPR
475 || (cfun->machine->call_saved_registers
476 != TYPE_NO_CALLER_SAVED_REGISTERS))
477 && (!cfun->machine->has_local_indirect_jump
478 || cfun->machine->indirect_branch_type == indirect_branch_keep));
479}
480
481/* Return true, if profiling code should be emitted before
482 prologue. Otherwise it returns false.
483 Note: For x86 with "hotfix" it is sorried. */
484static bool
485ix86_profile_before_prologue (void)
486{
487 return flag_fentry != 0;
488}
489
490/* Update register usage after having seen the compiler flags. */
491
492static void
493ix86_conditional_register_usage (void)
494{
495 int i, c_mask;
496
497 /* If there are no caller-saved registers, preserve all registers.
498 except fixed_regs and registers used for function return value
499 since aggregate_value_p checks call_used_regs[regno] on return
500 value. */
501 if (cfun
502 && (cfun->machine->call_saved_registers
503 == TYPE_NO_CALLER_SAVED_REGISTERS))
504 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
505 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
506 call_used_regs[i] = 0;
507
508 /* For 32-bit targets, disable the REX registers. */
509 if (! TARGET_64BIT)
510 {
511 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
512 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
513 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
515 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
516 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
517 }
518
519 /* See the definition of CALL_USED_REGISTERS in i386.h. */
520 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
521
522 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
523
524 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
525 {
526 /* Set/reset conditionally defined registers from
527 CALL_USED_REGISTERS initializer. */
528 if (call_used_regs[i] > 1)
529 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
530
531 /* Calculate registers of CLOBBERED_REGS register set
532 as call used registers from GENERAL_REGS register set. */
533 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i)
534 && call_used_regs[i])
535 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i);
536 }
537
538 /* If MMX is disabled, disable the registers. */
539 if (! TARGET_MMX)
540 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
541
542 /* If SSE is disabled, disable the registers. */
543 if (! TARGET_SSE)
544 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
545
546 /* If the FPU is disabled, disable the registers. */
547 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
548 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
549
550 /* If AVX512F is disabled, disable the registers. */
551 if (! TARGET_AVX512F)
552 {
553 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
554 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
555
556 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
557 }
558
559 /* If APX is disabled, disable the registers. */
560 if (! (TARGET_APX_EGPR && TARGET_64BIT))
561 {
562 for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
563 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
564 }
565}
566
567/* Canonicalize a comparison from one we don't have to one we do have. */
568
569static void
570ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
571 bool op0_preserve_value)
572{
573 /* The order of operands in x87 ficom compare is forced by combine in
574 simplify_comparison () function. Float operator is treated as RTX_OBJ
575 with a precedence over other operators and is always put in the first
576 place. Swap condition and operands to match ficom instruction. */
577 if (!op0_preserve_value
578 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
579 {
580 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
581
582 /* We are called only for compares that are split to SAHF instruction.
583 Ensure that we have setcc/jcc insn for the swapped condition. */
584 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
585 {
586 std::swap (a&: *op0, b&: *op1);
587 *code = (int) scode;
588 return;
589 }
590 }
591
592 /* Swap operands of GTU comparison to canonicalize
593 addcarry/subborrow comparison. */
594 if (!op0_preserve_value
595 && *code == GTU
596 && GET_CODE (*op0) == PLUS
597 && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
598 && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
599 && GET_CODE (*op1) == ZERO_EXTEND)
600 {
601 std::swap (a&: *op0, b&: *op1);
602 *code = (int) swap_condition ((enum rtx_code) *code);
603 return;
604 }
605}
606
607/* Hook to determine if one function can safely inline another. */
608
609static bool
610ix86_can_inline_p (tree caller, tree callee)
611{
612 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
613 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
614
615 /* Changes of those flags can be tolerated for always inlines. Lets hope
616 user knows what he is doing. */
617 unsigned HOST_WIDE_INT always_inline_safe_mask
618 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
619 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
620 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
621 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
622 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
623 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
624 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
625
626
627 if (!callee_tree)
628 callee_tree = target_option_default_node;
629 if (!caller_tree)
630 caller_tree = target_option_default_node;
631 if (callee_tree == caller_tree)
632 return true;
633
634 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
635 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
636 bool ret = false;
637 bool always_inline
638 = (DECL_DISREGARD_INLINE_LIMITS (callee)
639 && lookup_attribute (attr_name: "always_inline",
640 DECL_ATTRIBUTES (callee)));
641
642 /* If callee only uses GPRs, ignore MASK_80387. */
643 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
644 always_inline_safe_mask |= MASK_80387;
645
646 cgraph_node *callee_node = cgraph_node::get (decl: callee);
647 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
648 function can inline a SSE2 function but a SSE2 function can't inline
649 a SSE4 function. */
650 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
651 != callee_opts->x_ix86_isa_flags)
652 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
653 != callee_opts->x_ix86_isa_flags2))
654 ret = false;
655
656 /* See if we have the same non-isa options. */
657 else if ((!always_inline
658 && caller_opts->x_target_flags != callee_opts->x_target_flags)
659 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
660 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
661 ret = false;
662
663 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
664 /* If the calle doesn't use FP expressions differences in
665 ix86_fpmath can be ignored. We are called from FEs
666 for multi-versioning call optimization, so beware of
667 ipa_fn_summaries not available. */
668 && (! ipa_fn_summaries
669 || ipa_fn_summaries->get (node: callee_node) == NULL
670 || ipa_fn_summaries->get (node: callee_node)->fp_expressions))
671 ret = false;
672
673 /* At this point we cannot identify whether arch or tune setting
674 comes from target attribute or not. So the most conservative way
675 is to allow the callee that uses default arch and tune string to
676 be inlined. */
677 else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64")
678 && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic"))
679 ret = true;
680
681 /* See if arch, tune, etc. are the same. As previous ISA flags already
682 checks if callee's ISA is subset of caller's, do not block
683 always_inline attribute for callee even it has different arch. */
684 else if (!always_inline && caller_opts->arch != callee_opts->arch)
685 ret = false;
686
687 else if (!always_inline && caller_opts->tune != callee_opts->tune)
688 ret = false;
689
690 else if (!always_inline
691 && caller_opts->branch_cost != callee_opts->branch_cost)
692 ret = false;
693
694 else
695 ret = true;
696
697 return ret;
698}
699
700/* Return true if this goes in large data/bss. */
701
702static bool
703ix86_in_large_data_p (tree exp)
704{
705 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
706 && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
707 return false;
708
709 if (exp == NULL_TREE)
710 return false;
711
712 /* Functions are never large data. */
713 if (TREE_CODE (exp) == FUNCTION_DECL)
714 return false;
715
716 /* Automatic variables are never large data. */
717 if (VAR_P (exp) && !is_global_var (t: exp))
718 return false;
719
720 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
721 {
722 const char *section = DECL_SECTION_NAME (exp);
723 if (strcmp (s1: section, s2: ".ldata") == 0
724 || strcmp (s1: section, s2: ".lbss") == 0)
725 return true;
726 return false;
727 }
728 else
729 {
730 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
731
732 /* If this is an incomplete type with size 0, then we can't put it
733 in data because it might be too big when completed. Also,
734 int_size_in_bytes returns -1 if size can vary or is larger than
735 an integer in which case also it is safer to assume that it goes in
736 large data. */
737 if (size <= 0 || size > ix86_section_threshold)
738 return true;
739 }
740
741 return false;
742}
743
744/* i386-specific section flag to mark large sections. */
745#define SECTION_LARGE SECTION_MACH_DEP
746
747/* Switch to the appropriate section for output of DECL.
748 DECL is either a `VAR_DECL' node or a constant of some sort.
749 RELOC indicates whether forming the initial value of DECL requires
750 link-time relocations. */
751
752ATTRIBUTE_UNUSED static section *
753x86_64_elf_select_section (tree decl, int reloc,
754 unsigned HOST_WIDE_INT align)
755{
756 if (ix86_in_large_data_p (exp: decl))
757 {
758 const char *sname = NULL;
759 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
760 switch (categorize_decl_for_section (decl, reloc))
761 {
762 case SECCAT_DATA:
763 sname = ".ldata";
764 break;
765 case SECCAT_DATA_REL:
766 sname = ".ldata.rel";
767 break;
768 case SECCAT_DATA_REL_LOCAL:
769 sname = ".ldata.rel.local";
770 break;
771 case SECCAT_DATA_REL_RO:
772 sname = ".ldata.rel.ro";
773 break;
774 case SECCAT_DATA_REL_RO_LOCAL:
775 sname = ".ldata.rel.ro.local";
776 break;
777 case SECCAT_BSS:
778 sname = ".lbss";
779 flags |= SECTION_BSS;
780 break;
781 case SECCAT_RODATA:
782 case SECCAT_RODATA_MERGE_STR:
783 case SECCAT_RODATA_MERGE_STR_INIT:
784 case SECCAT_RODATA_MERGE_CONST:
785 sname = ".lrodata";
786 flags &= ~SECTION_WRITE;
787 break;
788 case SECCAT_SRODATA:
789 case SECCAT_SDATA:
790 case SECCAT_SBSS:
791 gcc_unreachable ();
792 case SECCAT_TEXT:
793 case SECCAT_TDATA:
794 case SECCAT_TBSS:
795 /* We don't split these for medium model. Place them into
796 default sections and hope for best. */
797 break;
798 }
799 if (sname)
800 {
801 /* We might get called with string constants, but get_named_section
802 doesn't like them as they are not DECLs. Also, we need to set
803 flags in that case. */
804 if (!DECL_P (decl))
805 return get_section (sname, flags, NULL);
806 return get_named_section (decl, sname, reloc);
807 }
808 }
809 return default_elf_select_section (decl, reloc, align);
810}
811
812/* Select a set of attributes for section NAME based on the properties
813 of DECL and whether or not RELOC indicates that DECL's initializer
814 might contain runtime relocations. */
815
816static unsigned int ATTRIBUTE_UNUSED
817x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
818{
819 unsigned int flags = default_section_type_flags (decl, name, reloc);
820
821 if (ix86_in_large_data_p (exp: decl))
822 flags |= SECTION_LARGE;
823
824 if (decl == NULL_TREE
825 && (strcmp (s1: name, s2: ".ldata.rel.ro") == 0
826 || strcmp (s1: name, s2: ".ldata.rel.ro.local") == 0))
827 flags |= SECTION_RELRO;
828
829 if (strcmp (s1: name, s2: ".lbss") == 0
830 || startswith (str: name, prefix: ".lbss.")
831 || startswith (str: name, prefix: ".gnu.linkonce.lb."))
832 flags |= SECTION_BSS;
833
834 return flags;
835}
836
837/* Build up a unique section name, expressed as a
838 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
839 RELOC indicates whether the initial value of EXP requires
840 link-time relocations. */
841
842static void ATTRIBUTE_UNUSED
843x86_64_elf_unique_section (tree decl, int reloc)
844{
845 if (ix86_in_large_data_p (exp: decl))
846 {
847 const char *prefix = NULL;
848 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
849 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
850
851 switch (categorize_decl_for_section (decl, reloc))
852 {
853 case SECCAT_DATA:
854 case SECCAT_DATA_REL:
855 case SECCAT_DATA_REL_LOCAL:
856 case SECCAT_DATA_REL_RO:
857 case SECCAT_DATA_REL_RO_LOCAL:
858 prefix = one_only ? ".ld" : ".ldata";
859 break;
860 case SECCAT_BSS:
861 prefix = one_only ? ".lb" : ".lbss";
862 break;
863 case SECCAT_RODATA:
864 case SECCAT_RODATA_MERGE_STR:
865 case SECCAT_RODATA_MERGE_STR_INIT:
866 case SECCAT_RODATA_MERGE_CONST:
867 prefix = one_only ? ".lr" : ".lrodata";
868 break;
869 case SECCAT_SRODATA:
870 case SECCAT_SDATA:
871 case SECCAT_SBSS:
872 gcc_unreachable ();
873 case SECCAT_TEXT:
874 case SECCAT_TDATA:
875 case SECCAT_TBSS:
876 /* We don't split these for medium model. Place them into
877 default sections and hope for best. */
878 break;
879 }
880 if (prefix)
881 {
882 const char *name, *linkonce;
883 char *string;
884
885 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
886 name = targetm.strip_name_encoding (name);
887
888 /* If we're using one_only, then there needs to be a .gnu.linkonce
889 prefix to the section name. */
890 linkonce = one_only ? ".gnu.linkonce" : "";
891
892 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
893
894 set_decl_section_name (decl, string);
895 return;
896 }
897 }
898 default_unique_section (decl, reloc);
899}
900
901#ifdef COMMON_ASM_OP
902
903#ifndef LARGECOMM_SECTION_ASM_OP
904#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
905#endif
906
907/* This says how to output assembler code to declare an
908 uninitialized external linkage data object.
909
910 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
911 large objects. */
912void
913x86_elf_aligned_decl_common (FILE *file, tree decl,
914 const char *name, unsigned HOST_WIDE_INT size,
915 unsigned align)
916{
917 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
918 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
919 && size > (unsigned int)ix86_section_threshold)
920 {
921 switch_to_section (get_named_section (decl, ".lbss", 0));
922 fputs (LARGECOMM_SECTION_ASM_OP, stream: file);
923 }
924 else
925 fputs (COMMON_ASM_OP, stream: file);
926 assemble_name (file, name);
927 fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
928 size, align / BITS_PER_UNIT);
929}
930#endif
931
932/* Utility function for targets to use in implementing
933 ASM_OUTPUT_ALIGNED_BSS. */
934
935void
936x86_output_aligned_bss (FILE *file, tree decl, const char *name,
937 unsigned HOST_WIDE_INT size, unsigned align)
938{
939 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
940 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
941 && size > (unsigned int)ix86_section_threshold)
942 switch_to_section (get_named_section (decl, ".lbss", 0));
943 else
944 switch_to_section (bss_section);
945 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
946#ifdef ASM_DECLARE_OBJECT_NAME
947 last_assemble_variable_decl = decl;
948 ASM_DECLARE_OBJECT_NAME (file, name, decl);
949#else
950 /* Standard thing is just output label for the object. */
951 ASM_OUTPUT_LABEL (file, name);
952#endif /* ASM_DECLARE_OBJECT_NAME */
953 ASM_OUTPUT_SKIP (file, size ? size : 1);
954}
955
956/* Decide whether we must probe the stack before any space allocation
957 on this target. It's essentially TARGET_STACK_PROBE except when
958 -fstack-check causes the stack to be already probed differently. */
959
960bool
961ix86_target_stack_probe (void)
962{
963 /* Do not probe the stack twice if static stack checking is enabled. */
964 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
965 return false;
966
967 return TARGET_STACK_PROBE;
968}
969
970/* Decide whether we can make a sibling call to a function. DECL is the
971 declaration of the function being targeted by the call and EXP is the
972 CALL_EXPR representing the call. */
973
974static bool
975ix86_function_ok_for_sibcall (tree decl, tree exp)
976{
977 tree type, decl_or_type;
978 rtx a, b;
979 bool bind_global = decl && !targetm.binds_local_p (decl);
980
981 if (ix86_function_naked (fn: current_function_decl))
982 return false;
983
984 /* Sibling call isn't OK if there are no caller-saved registers
985 since all registers must be preserved before return. */
986 if (cfun->machine->call_saved_registers
987 == TYPE_NO_CALLER_SAVED_REGISTERS)
988 return false;
989
990 /* If we are generating position-independent code, we cannot sibcall
991 optimize direct calls to global functions, as the PLT requires
992 %ebx be live. (Darwin does not have a PLT.) */
993 if (!TARGET_MACHO
994 && !TARGET_64BIT
995 && flag_pic
996 && flag_plt
997 && bind_global)
998 return false;
999
1000 /* If we need to align the outgoing stack, then sibcalling would
1001 unalign the stack, which may break the called function. */
1002 if (ix86_minimum_incoming_stack_boundary (true)
1003 < PREFERRED_STACK_BOUNDARY)
1004 return false;
1005
1006 if (decl)
1007 {
1008 decl_or_type = decl;
1009 type = TREE_TYPE (decl);
1010 }
1011 else
1012 {
1013 /* We're looking at the CALL_EXPR, we need the type of the function. */
1014 type = CALL_EXPR_FN (exp); /* pointer expression */
1015 type = TREE_TYPE (type); /* pointer type */
1016 type = TREE_TYPE (type); /* function type */
1017 decl_or_type = type;
1018 }
1019
1020 /* Sibling call isn't OK if callee has no callee-saved registers
1021 and the calling function has callee-saved registers. */
1022 if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
1023 && (cfun->machine->call_saved_registers
1024 != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
1025 && lookup_attribute (attr_name: "no_callee_saved_registers",
1026 TYPE_ATTRIBUTES (type)))
1027 return false;
1028
1029 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1030 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1031 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1032 || (REG_PARM_STACK_SPACE (decl_or_type)
1033 != REG_PARM_STACK_SPACE (current_function_decl)))
1034 {
1035 maybe_complain_about_tail_call (exp,
1036 "inconsistent size of stack space"
1037 " allocated for arguments which are"
1038 " passed in registers");
1039 return false;
1040 }
1041
1042 /* Check that the return value locations are the same. Like
1043 if we are returning floats on the 80387 register stack, we cannot
1044 make a sibcall from a function that doesn't return a float to a
1045 function that does or, conversely, from a function that does return
1046 a float to a function that doesn't; the necessary stack adjustment
1047 would not be executed. This is also the place we notice
1048 differences in the return value ABI. Note that it is ok for one
1049 of the functions to have void return type as long as the return
1050 value of the other is passed in a register. */
1051 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1052 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1053 cfun->decl, false);
1054 if (STACK_REG_P (a) || STACK_REG_P (b))
1055 {
1056 if (!rtx_equal_p (a, b))
1057 return false;
1058 }
1059 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1060 ;
1061 else if (!rtx_equal_p (a, b))
1062 return false;
1063
1064 if (TARGET_64BIT)
1065 {
1066 /* The SYSV ABI has more call-clobbered registers;
1067 disallow sibcalls from MS to SYSV. */
1068 if (cfun->machine->call_abi == MS_ABI
1069 && ix86_function_type_abi (type) == SYSV_ABI)
1070 return false;
1071 }
1072 else
1073 {
1074 /* If this call is indirect, we'll need to be able to use a
1075 call-clobbered register for the address of the target function.
1076 Make sure that all such registers are not used for passing
1077 parameters. Note that DLLIMPORT functions and call to global
1078 function via GOT slot are indirect. */
1079 if (!decl
1080 || (bind_global && flag_pic && !flag_plt)
1081 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1082 || flag_force_indirect_call)
1083 {
1084 /* Check if regparm >= 3 since arg_reg_available is set to
1085 false if regparm == 0. If regparm is 1 or 2, there is
1086 always a call-clobbered register available.
1087
1088 ??? The symbol indirect call doesn't need a call-clobbered
1089 register. But we don't know if this is a symbol indirect
1090 call or not here. */
1091 if (ix86_function_regparm (type, decl) >= 3
1092 && !cfun->machine->arg_reg_available)
1093 return false;
1094 }
1095 }
1096
1097 if (decl && ix86_use_pseudo_pic_reg ())
1098 {
1099 /* When PIC register is used, it must be restored after ifunc
1100 function returns. */
1101 cgraph_node *node = cgraph_node::get (decl);
1102 if (node && node->ifunc_resolver)
1103 return false;
1104 }
1105
1106 /* Disable sibcall if callee has indirect_return attribute and
1107 caller doesn't since callee will return to the caller's caller
1108 via an indirect jump. */
1109 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1110 == (CF_RETURN | CF_BRANCH))
1111 && lookup_attribute (attr_name: "indirect_return", TYPE_ATTRIBUTES (type))
1112 && !lookup_attribute (attr_name: "indirect_return",
1113 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1114 return false;
1115
1116 /* Otherwise okay. That also includes certain types of indirect calls. */
1117 return true;
1118}
1119
1120/* This function determines from TYPE the calling-convention. */
1121
1122unsigned int
1123ix86_get_callcvt (const_tree type)
1124{
1125 unsigned int ret = 0;
1126 bool is_stdarg;
1127 tree attrs;
1128
1129 if (TARGET_64BIT)
1130 return IX86_CALLCVT_CDECL;
1131
1132 attrs = TYPE_ATTRIBUTES (type);
1133 if (attrs != NULL_TREE)
1134 {
1135 if (lookup_attribute (attr_name: "cdecl", list: attrs))
1136 ret |= IX86_CALLCVT_CDECL;
1137 else if (lookup_attribute (attr_name: "stdcall", list: attrs))
1138 ret |= IX86_CALLCVT_STDCALL;
1139 else if (lookup_attribute (attr_name: "fastcall", list: attrs))
1140 ret |= IX86_CALLCVT_FASTCALL;
1141 else if (lookup_attribute (attr_name: "thiscall", list: attrs))
1142 ret |= IX86_CALLCVT_THISCALL;
1143
1144 /* Regparam isn't allowed for thiscall and fastcall. */
1145 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1146 {
1147 if (lookup_attribute (attr_name: "regparm", list: attrs))
1148 ret |= IX86_CALLCVT_REGPARM;
1149 if (lookup_attribute (attr_name: "sseregparm", list: attrs))
1150 ret |= IX86_CALLCVT_SSEREGPARM;
1151 }
1152
1153 if (IX86_BASE_CALLCVT(ret) != 0)
1154 return ret;
1155 }
1156
1157 is_stdarg = stdarg_p (type);
1158 if (TARGET_RTD && !is_stdarg)
1159 return IX86_CALLCVT_STDCALL | ret;
1160
1161 if (ret != 0
1162 || is_stdarg
1163 || TREE_CODE (type) != METHOD_TYPE
1164 || ix86_function_type_abi (type) != MS_ABI)
1165 return IX86_CALLCVT_CDECL | ret;
1166
1167 return IX86_CALLCVT_THISCALL;
1168}
1169
1170/* Return 0 if the attributes for two types are incompatible, 1 if they
1171 are compatible, and 2 if they are nearly compatible (which causes a
1172 warning to be generated). */
1173
1174static int
1175ix86_comp_type_attributes (const_tree type1, const_tree type2)
1176{
1177 unsigned int ccvt1, ccvt2;
1178
1179 if (TREE_CODE (type1) != FUNCTION_TYPE
1180 && TREE_CODE (type1) != METHOD_TYPE)
1181 return 1;
1182
1183 ccvt1 = ix86_get_callcvt (type: type1);
1184 ccvt2 = ix86_get_callcvt (type: type2);
1185 if (ccvt1 != ccvt2)
1186 return 0;
1187 if (ix86_function_regparm (type1, NULL)
1188 != ix86_function_regparm (type2, NULL))
1189 return 0;
1190
1191 if (lookup_attribute (attr_name: "no_callee_saved_registers",
1192 TYPE_ATTRIBUTES (type1))
1193 != lookup_attribute (attr_name: "no_callee_saved_registers",
1194 TYPE_ATTRIBUTES (type2)))
1195 return 0;
1196
1197 return 1;
1198}
1199
1200/* Return the regparm value for a function with the indicated TYPE and DECL.
1201 DECL may be NULL when calling function indirectly
1202 or considering a libcall. */
1203
1204static int
1205ix86_function_regparm (const_tree type, const_tree decl)
1206{
1207 tree attr;
1208 int regparm;
1209 unsigned int ccvt;
1210
1211 if (TARGET_64BIT)
1212 return (ix86_function_type_abi (type) == SYSV_ABI
1213 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1214 ccvt = ix86_get_callcvt (type);
1215 regparm = ix86_regparm;
1216
1217 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1218 {
1219 attr = lookup_attribute (attr_name: "regparm", TYPE_ATTRIBUTES (type));
1220 if (attr)
1221 {
1222 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1223 return regparm;
1224 }
1225 }
1226 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1227 return 2;
1228 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1229 return 1;
1230
1231 /* Use register calling convention for local functions when possible. */
1232 if (decl
1233 && TREE_CODE (decl) == FUNCTION_DECL)
1234 {
1235 cgraph_node *target = cgraph_node::get (decl);
1236 if (target)
1237 target = target->function_symbol ();
1238
1239 /* Caller and callee must agree on the calling convention, so
1240 checking here just optimize means that with
1241 __attribute__((optimize (...))) caller could use regparm convention
1242 and callee not, or vice versa. Instead look at whether the callee
1243 is optimized or not. */
1244 if (target && opt_for_fn (target->decl, optimize)
1245 && !(profile_flag && !flag_fentry))
1246 {
1247 if (target->local && target->can_change_signature)
1248 {
1249 int local_regparm, globals = 0, regno;
1250
1251 /* Make sure no regparm register is taken by a
1252 fixed register variable. */
1253 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1254 local_regparm++)
1255 if (fixed_regs[local_regparm])
1256 break;
1257
1258 /* We don't want to use regparm(3) for nested functions as
1259 these use a static chain pointer in the third argument. */
1260 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1261 local_regparm = 2;
1262
1263 /* Save a register for the split stack. */
1264 if (flag_split_stack)
1265 {
1266 if (local_regparm == 3)
1267 local_regparm = 2;
1268 else if (local_regparm == 2
1269 && DECL_STATIC_CHAIN (target->decl))
1270 local_regparm = 1;
1271 }
1272
1273 /* Each fixed register usage increases register pressure,
1274 so less registers should be used for argument passing.
1275 This functionality can be overriden by an explicit
1276 regparm value. */
1277 for (regno = AX_REG; regno <= DI_REG; regno++)
1278 if (fixed_regs[regno])
1279 globals++;
1280
1281 local_regparm
1282 = globals < local_regparm ? local_regparm - globals : 0;
1283
1284 if (local_regparm > regparm)
1285 regparm = local_regparm;
1286 }
1287 }
1288 }
1289
1290 return regparm;
1291}
1292
1293/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1294 DFmode (2) arguments in SSE registers for a function with the
1295 indicated TYPE and DECL. DECL may be NULL when calling function
1296 indirectly or considering a libcall. Return -1 if any FP parameter
1297 should be rejected by error. This is used in siutation we imply SSE
1298 calling convetion but the function is called from another function with
1299 SSE disabled. Otherwise return 0. */
1300
1301static int
1302ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1303{
1304 gcc_assert (!TARGET_64BIT);
1305
1306 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1307 by the sseregparm attribute. */
1308 if (TARGET_SSEREGPARM
1309 || (type && lookup_attribute (attr_name: "sseregparm", TYPE_ATTRIBUTES (type))))
1310 {
1311 if (!TARGET_SSE)
1312 {
1313 if (warn)
1314 {
1315 if (decl)
1316 error ("calling %qD with attribute sseregparm without "
1317 "SSE/SSE2 enabled", decl);
1318 else
1319 error ("calling %qT with attribute sseregparm without "
1320 "SSE/SSE2 enabled", type);
1321 }
1322 return 0;
1323 }
1324
1325 return 2;
1326 }
1327
1328 if (!decl)
1329 return 0;
1330
1331 cgraph_node *target = cgraph_node::get (decl);
1332 if (target)
1333 target = target->function_symbol ();
1334
1335 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1336 (and DFmode for SSE2) arguments in SSE registers. */
1337 if (target
1338 /* TARGET_SSE_MATH */
1339 && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE)
1340 && opt_for_fn (target->decl, optimize)
1341 && !(profile_flag && !flag_fentry))
1342 {
1343 if (target->local && target->can_change_signature)
1344 {
1345 /* Refuse to produce wrong code when local function with SSE enabled
1346 is called from SSE disabled function.
1347 FIXME: We need a way to detect these cases cross-ltrans partition
1348 and avoid using SSE calling conventions on local functions called
1349 from function with SSE disabled. For now at least delay the
1350 warning until we know we are going to produce wrong code.
1351 See PR66047 */
1352 if (!TARGET_SSE && warn)
1353 return -1;
1354 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1355 ->x_ix86_isa_flags) ? 2 : 1;
1356 }
1357 }
1358
1359 return 0;
1360}
1361
1362/* Return true if EAX is live at the start of the function. Used by
1363 ix86_expand_prologue to determine if we need special help before
1364 calling allocate_stack_worker. */
1365
1366static bool
1367ix86_eax_live_at_start_p (void)
1368{
1369 /* Cheat. Don't bother working forward from ix86_function_regparm
1370 to the function type to whether an actual argument is located in
1371 eax. Instead just look at cfg info, which is still close enough
1372 to correct at this point. This gives false positives for broken
1373 functions that might use uninitialized data that happens to be
1374 allocated in eax, but who cares? */
1375 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1376}
1377
1378static bool
1379ix86_keep_aggregate_return_pointer (tree fntype)
1380{
1381 tree attr;
1382
1383 if (!TARGET_64BIT)
1384 {
1385 attr = lookup_attribute (attr_name: "callee_pop_aggregate_return",
1386 TYPE_ATTRIBUTES (fntype));
1387 if (attr)
1388 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1389
1390 /* For 32-bit MS-ABI the default is to keep aggregate
1391 return pointer. */
1392 if (ix86_function_type_abi (fntype) == MS_ABI)
1393 return true;
1394 }
1395 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1396}
1397
1398/* Value is the number of bytes of arguments automatically
1399 popped when returning from a subroutine call.
1400 FUNDECL is the declaration node of the function (as a tree),
1401 FUNTYPE is the data type of the function (as a tree),
1402 or for a library call it is an identifier node for the subroutine name.
1403 SIZE is the number of bytes of arguments passed on the stack.
1404
1405 On the 80386, the RTD insn may be used to pop them if the number
1406 of args is fixed, but if the number is variable then the caller
1407 must pop them all. RTD can't be used for library calls now
1408 because the library is compiled with the Unix compiler.
1409 Use of RTD is a selectable option, since it is incompatible with
1410 standard Unix calling sequences. If the option is not selected,
1411 the caller must always pop the args.
1412
1413 The attribute stdcall is equivalent to RTD on a per module basis. */
1414
1415static poly_int64
1416ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1417{
1418 unsigned int ccvt;
1419
1420 /* None of the 64-bit ABIs pop arguments. */
1421 if (TARGET_64BIT)
1422 return 0;
1423
1424 ccvt = ix86_get_callcvt (type: funtype);
1425
1426 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1427 | IX86_CALLCVT_THISCALL)) != 0
1428 && ! stdarg_p (funtype))
1429 return size;
1430
1431 /* Lose any fake structure return argument if it is passed on the stack. */
1432 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1433 && !ix86_keep_aggregate_return_pointer (fntype: funtype))
1434 {
1435 int nregs = ix86_function_regparm (type: funtype, decl: fundecl);
1436 if (nregs == 0)
1437 return GET_MODE_SIZE (Pmode);
1438 }
1439
1440 return 0;
1441}
1442
1443/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1444
1445static bool
1446ix86_legitimate_combined_insn (rtx_insn *insn)
1447{
1448 int i;
1449
1450 /* Check operand constraints in case hard registers were propagated
1451 into insn pattern. This check prevents combine pass from
1452 generating insn patterns with invalid hard register operands.
1453 These invalid insns can eventually confuse reload to error out
1454 with a spill failure. See also PRs 46829 and 46843. */
1455
1456 gcc_assert (INSN_CODE (insn) >= 0);
1457
1458 extract_insn (insn);
1459 preprocess_constraints (insn);
1460
1461 int n_operands = recog_data.n_operands;
1462 int n_alternatives = recog_data.n_alternatives;
1463 for (i = 0; i < n_operands; i++)
1464 {
1465 rtx op = recog_data.operand[i];
1466 machine_mode mode = GET_MODE (op);
1467 const operand_alternative *op_alt;
1468 int offset = 0;
1469 bool win;
1470 int j;
1471
1472 /* A unary operator may be accepted by the predicate, but it
1473 is irrelevant for matching constraints. */
1474 if (UNARY_P (op))
1475 op = XEXP (op, 0);
1476
1477 if (SUBREG_P (op))
1478 {
1479 if (REG_P (SUBREG_REG (op))
1480 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1481 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1482 GET_MODE (SUBREG_REG (op)),
1483 SUBREG_BYTE (op),
1484 GET_MODE (op));
1485 op = SUBREG_REG (op);
1486 }
1487
1488 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1489 continue;
1490
1491 op_alt = recog_op_alt;
1492
1493 /* Operand has no constraints, anything is OK. */
1494 win = !n_alternatives;
1495
1496 alternative_mask preferred = get_preferred_alternatives (insn);
1497 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1498 {
1499 if (!TEST_BIT (preferred, j))
1500 continue;
1501 if (op_alt[i].anything_ok
1502 || (op_alt[i].matches != -1
1503 && operands_match_p
1504 (recog_data.operand[i],
1505 recog_data.operand[op_alt[i].matches]))
1506 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1507 {
1508 win = true;
1509 break;
1510 }
1511 }
1512
1513 if (!win)
1514 return false;
1515 }
1516
1517 return true;
1518}
1519
1520/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1521
1522static unsigned HOST_WIDE_INT
1523ix86_asan_shadow_offset (void)
1524{
1525 return SUBTARGET_SHADOW_OFFSET;
1526}
1527
1528/* Argument support functions. */
1529
1530/* Return true when register may be used to pass function parameters. */
1531bool
1532ix86_function_arg_regno_p (int regno)
1533{
1534 int i;
1535 enum calling_abi call_abi;
1536 const int *parm_regs;
1537
1538 if (TARGET_SSE && SSE_REGNO_P (regno)
1539 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1540 return true;
1541
1542 if (!TARGET_64BIT)
1543 return (regno < REGPARM_MAX
1544 || (TARGET_MMX && MMX_REGNO_P (regno)
1545 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1546
1547 /* TODO: The function should depend on current function ABI but
1548 builtins.cc would need updating then. Therefore we use the
1549 default ABI. */
1550 call_abi = ix86_cfun_abi ();
1551
1552 /* RAX is used as hidden argument to va_arg functions. */
1553 if (call_abi == SYSV_ABI && regno == AX_REG)
1554 return true;
1555
1556 if (call_abi == MS_ABI)
1557 parm_regs = x86_64_ms_abi_int_parameter_registers;
1558 else
1559 parm_regs = x86_64_int_parameter_registers;
1560
1561 for (i = 0; i < (call_abi == MS_ABI
1562 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1563 if (regno == parm_regs[i])
1564 return true;
1565 return false;
1566}
1567
1568/* Return if we do not know how to pass ARG solely in registers. */
1569
1570static bool
1571ix86_must_pass_in_stack (const function_arg_info &arg)
1572{
1573 if (must_pass_in_stack_var_size_or_pad (arg))
1574 return true;
1575
1576 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1577 The layout_type routine is crafty and tries to trick us into passing
1578 currently unsupported vector types on the stack by using TImode. */
1579 return (!TARGET_64BIT && arg.mode == TImode
1580 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1581}
1582
1583/* It returns the size, in bytes, of the area reserved for arguments passed
1584 in registers for the function represented by fndecl dependent to the used
1585 abi format. */
1586int
1587ix86_reg_parm_stack_space (const_tree fndecl)
1588{
1589 enum calling_abi call_abi = SYSV_ABI;
1590 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1591 call_abi = ix86_function_abi (fndecl);
1592 else
1593 call_abi = ix86_function_type_abi (fndecl);
1594 if (TARGET_64BIT && call_abi == MS_ABI)
1595 return 32;
1596 return 0;
1597}
1598
1599/* We add this as a workaround in order to use libc_has_function
1600 hook in i386.md. */
1601bool
1602ix86_libc_has_function (enum function_class fn_class)
1603{
1604 return targetm.libc_has_function (fn_class, NULL_TREE);
1605}
1606
1607/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1608 specifying the call abi used. */
1609enum calling_abi
1610ix86_function_type_abi (const_tree fntype)
1611{
1612 enum calling_abi abi = ix86_abi;
1613
1614 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1615 return abi;
1616
1617 if (abi == SYSV_ABI
1618 && lookup_attribute (attr_name: "ms_abi", TYPE_ATTRIBUTES (fntype)))
1619 {
1620 static int warned;
1621 if (TARGET_X32 && !warned)
1622 {
1623 error ("X32 does not support %<ms_abi%> attribute");
1624 warned = 1;
1625 }
1626
1627 abi = MS_ABI;
1628 }
1629 else if (abi == MS_ABI
1630 && lookup_attribute (attr_name: "sysv_abi", TYPE_ATTRIBUTES (fntype)))
1631 abi = SYSV_ABI;
1632
1633 return abi;
1634}
1635
1636enum calling_abi
1637ix86_function_abi (const_tree fndecl)
1638{
1639 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1640}
1641
1642/* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1643 specifying the call abi used. */
1644enum calling_abi
1645ix86_cfun_abi (void)
1646{
1647 return cfun ? cfun->machine->call_abi : ix86_abi;
1648}
1649
1650bool
1651ix86_function_ms_hook_prologue (const_tree fn)
1652{
1653 if (fn && lookup_attribute (attr_name: "ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1654 {
1655 if (decl_function_context (fn) != NULL_TREE)
1656 error_at (DECL_SOURCE_LOCATION (fn),
1657 "%<ms_hook_prologue%> attribute is not compatible "
1658 "with nested function");
1659 else
1660 return true;
1661 }
1662 return false;
1663}
1664
1665bool
1666ix86_function_naked (const_tree fn)
1667{
1668 if (fn && lookup_attribute (attr_name: "naked", DECL_ATTRIBUTES (fn)))
1669 return true;
1670
1671 return false;
1672}
1673
1674/* Write the extra assembler code needed to declare a function properly. */
1675
1676void
1677ix86_asm_output_function_label (FILE *out_file, const char *fname,
1678 tree decl)
1679{
1680 bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl);
1681
1682 if (cfun)
1683 cfun->machine->function_label_emitted = true;
1684
1685 if (is_ms_hook)
1686 {
1687 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1688 unsigned int filler_cc = 0xcccccccc;
1689
1690 for (i = 0; i < filler_count; i += 4)
1691 fprintf (stream: out_file, ASM_LONG " %#x\n", filler_cc);
1692 }
1693
1694#ifdef SUBTARGET_ASM_UNWIND_INIT
1695 SUBTARGET_ASM_UNWIND_INIT (out_file);
1696#endif
1697
1698 assemble_function_label_raw (out_file, fname);
1699
1700 /* Output magic byte marker, if hot-patch attribute is set. */
1701 if (is_ms_hook)
1702 {
1703 if (TARGET_64BIT)
1704 {
1705 /* leaq [%rsp + 0], %rsp */
1706 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1707 stream: out_file);
1708 }
1709 else
1710 {
1711 /* movl.s %edi, %edi
1712 push %ebp
1713 movl.s %esp, %ebp */
1714 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", stream: out_file);
1715 }
1716 }
1717}
1718
1719/* Output a user-defined label. In AT&T syntax, registers are prefixed
1720 with %, so labels require no punctuation. In Intel syntax, registers
1721 are unprefixed, so labels may clash with registers or other operators,
1722 and require quoting. */
1723void
1724ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
1725{
1726 if (ASSEMBLER_DIALECT == ASM_ATT)
1727 fprintf (stream: file, format: "%s%s", prefix, label);
1728 else
1729 fprintf (stream: file, format: "\"%s%s\"", prefix, label);
1730}
1731
1732/* Implementation of call abi switching target hook. Specific to FNDECL
1733 the specific call register sets are set. See also
1734 ix86_conditional_register_usage for more details. */
1735void
1736ix86_call_abi_override (const_tree fndecl)
1737{
1738 cfun->machine->call_abi = ix86_function_abi (fndecl);
1739}
1740
1741/* Return 1 if pseudo register should be created and used to hold
1742 GOT address for PIC code. */
1743bool
1744ix86_use_pseudo_pic_reg (void)
1745{
1746 if ((TARGET_64BIT
1747 && (ix86_cmodel == CM_SMALL_PIC
1748 || TARGET_PECOFF))
1749 || !flag_pic)
1750 return false;
1751 return true;
1752}
1753
1754/* Initialize large model PIC register. */
1755
1756static void
1757ix86_init_large_pic_reg (unsigned int tmp_regno)
1758{
1759 rtx_code_label *label;
1760 rtx tmp_reg;
1761
1762 gcc_assert (Pmode == DImode);
1763 label = gen_label_rtx ();
1764 emit_label (label);
1765 LABEL_PRESERVE_P (label) = 1;
1766 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1767 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1768 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1769 label));
1770 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1771 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1772 const char *name = LABEL_NAME (label);
1773 PUT_CODE (label, NOTE);
1774 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1775 NOTE_DELETED_LABEL_NAME (label) = name;
1776}
1777
1778/* Create and initialize PIC register if required. */
1779static void
1780ix86_init_pic_reg (void)
1781{
1782 edge entry_edge;
1783 rtx_insn *seq;
1784
1785 if (!ix86_use_pseudo_pic_reg ())
1786 return;
1787
1788 start_sequence ();
1789
1790 if (TARGET_64BIT)
1791 {
1792 if (ix86_cmodel == CM_LARGE_PIC)
1793 ix86_init_large_pic_reg (R11_REG);
1794 else
1795 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1796 }
1797 else
1798 {
1799 /* If there is future mcount call in the function it is more profitable
1800 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1801 rtx reg = crtl->profile
1802 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1803 : pic_offset_table_rtx;
1804 rtx_insn *insn = emit_insn (gen_set_got (reg));
1805 RTX_FRAME_RELATED_P (insn) = 1;
1806 if (crtl->profile)
1807 emit_move_insn (pic_offset_table_rtx, reg);
1808 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1809 }
1810
1811 seq = end_sequence ();
1812
1813 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1814 insert_insn_on_edge (seq, entry_edge);
1815 commit_one_edge_insertion (e: entry_edge);
1816}
1817
1818/* Initialize a variable CUM of type CUMULATIVE_ARGS
1819 for a call to a function whose data type is FNTYPE.
1820 For a library call, FNTYPE is 0. */
1821
1822void
1823init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1824 tree fntype, /* tree ptr for function decl */
1825 rtx libname, /* SYMBOL_REF of library name or 0 */
1826 tree fndecl,
1827 int caller)
1828{
1829 struct cgraph_node *local_info_node = NULL;
1830 struct cgraph_node *target = NULL;
1831
1832 /* Set silent_p to false to raise an error for invalid calls when
1833 expanding function body. */
1834 cfun->machine->silent_p = false;
1835
1836 memset (s: cum, c: 0, n: sizeof (*cum));
1837
1838 if (fndecl)
1839 {
1840 target = cgraph_node::get (decl: fndecl);
1841 if (target)
1842 {
1843 target = target->function_symbol ();
1844 local_info_node = cgraph_node::local_info_node (decl: target->decl);
1845 cum->call_abi = ix86_function_abi (fndecl: target->decl);
1846 }
1847 else
1848 cum->call_abi = ix86_function_abi (fndecl);
1849 }
1850 else
1851 cum->call_abi = ix86_function_type_abi (fntype);
1852
1853 cum->caller = caller;
1854
1855 /* Set up the number of registers to use for passing arguments. */
1856 cum->nregs = ix86_regparm;
1857 if (TARGET_64BIT)
1858 {
1859 cum->nregs = (cum->call_abi == SYSV_ABI
1860 ? X86_64_REGPARM_MAX
1861 : X86_64_MS_REGPARM_MAX);
1862 }
1863 if (TARGET_SSE)
1864 {
1865 cum->sse_nregs = SSE_REGPARM_MAX;
1866 if (TARGET_64BIT)
1867 {
1868 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1869 ? X86_64_SSE_REGPARM_MAX
1870 : X86_64_MS_SSE_REGPARM_MAX);
1871 }
1872 }
1873 if (TARGET_MMX)
1874 cum->mmx_nregs = MMX_REGPARM_MAX;
1875 cum->warn_avx512f = true;
1876 cum->warn_avx = true;
1877 cum->warn_sse = true;
1878 cum->warn_mmx = true;
1879
1880 /* Because type might mismatch in between caller and callee, we need to
1881 use actual type of function for local calls.
1882 FIXME: cgraph_analyze can be told to actually record if function uses
1883 va_start so for local functions maybe_vaarg can be made aggressive
1884 helping K&R code.
1885 FIXME: once typesytem is fixed, we won't need this code anymore. */
1886 if (local_info_node && local_info_node->local
1887 && local_info_node->can_change_signature)
1888 fntype = TREE_TYPE (target->decl);
1889 cum->stdarg = stdarg_p (fntype);
1890 cum->maybe_vaarg = (fntype
1891 ? (!prototype_p (fntype) || stdarg_p (fntype))
1892 : !libname);
1893
1894 cum->decl = fndecl;
1895
1896 cum->warn_empty = !warn_abi || cum->stdarg;
1897 if (!cum->warn_empty && fntype)
1898 {
1899 function_args_iterator iter;
1900 tree argtype;
1901 bool seen_empty_type = false;
1902 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1903 {
1904 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1905 break;
1906 if (TYPE_EMPTY_P (argtype))
1907 seen_empty_type = true;
1908 else if (seen_empty_type)
1909 {
1910 cum->warn_empty = true;
1911 break;
1912 }
1913 }
1914 }
1915
1916 if (!TARGET_64BIT)
1917 {
1918 /* If there are variable arguments, then we won't pass anything
1919 in registers in 32-bit mode. */
1920 if (stdarg_p (fntype))
1921 {
1922 cum->nregs = 0;
1923 /* Since in 32-bit, variable arguments are always passed on
1924 stack, there is scratch register available for indirect
1925 sibcall. */
1926 cfun->machine->arg_reg_available = true;
1927 cum->sse_nregs = 0;
1928 cum->mmx_nregs = 0;
1929 cum->warn_avx512f = false;
1930 cum->warn_avx = false;
1931 cum->warn_sse = false;
1932 cum->warn_mmx = false;
1933 return;
1934 }
1935
1936 /* Use ecx and edx registers if function has fastcall attribute,
1937 else look for regparm information. */
1938 if (fntype)
1939 {
1940 unsigned int ccvt = ix86_get_callcvt (type: fntype);
1941 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1942 {
1943 cum->nregs = 1;
1944 cum->fastcall = 1; /* Same first register as in fastcall. */
1945 }
1946 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1947 {
1948 cum->nregs = 2;
1949 cum->fastcall = 1;
1950 }
1951 else
1952 cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl);
1953 }
1954
1955 /* Set up the number of SSE registers used for passing SFmode
1956 and DFmode arguments. Warn for mismatching ABI. */
1957 cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true);
1958 }
1959
1960 cfun->machine->arg_reg_available = (cum->nregs > 0);
1961}
1962
1963/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1964 But in the case of vector types, it is some vector mode.
1965
1966 When we have only some of our vector isa extensions enabled, then there
1967 are some modes for which vector_mode_supported_p is false. For these
1968 modes, the generic vector support in gcc will choose some non-vector mode
1969 in order to implement the type. By computing the natural mode, we'll
1970 select the proper ABI location for the operand and not depend on whatever
1971 the middle-end decides to do with these vector types.
1972
1973 The midde-end can't deal with the vector types > 16 bytes. In this
1974 case, we return the original mode and warn ABI change if CUM isn't
1975 NULL.
1976
1977 If INT_RETURN is true, warn ABI change if the vector mode isn't
1978 available for function return value. */
1979
1980static machine_mode
1981type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1982 bool in_return)
1983{
1984 machine_mode mode = TYPE_MODE (type);
1985
1986 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1987 {
1988 HOST_WIDE_INT size = int_size_in_bytes (type);
1989 if ((size == 8 || size == 16 || size == 32 || size == 64)
1990 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1991 && TYPE_VECTOR_SUBPARTS (node: type) > 1)
1992 {
1993 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1994
1995 /* There are no XFmode vector modes ... */
1996 if (innermode == XFmode)
1997 return mode;
1998
1999 /* ... and no decimal float vector modes. */
2000 if (DECIMAL_FLOAT_MODE_P (innermode))
2001 return mode;
2002
2003 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
2004 mode = MIN_MODE_VECTOR_FLOAT;
2005 else
2006 mode = MIN_MODE_VECTOR_INT;
2007
2008 /* Get the mode which has this inner mode and number of units. */
2009 FOR_EACH_MODE_FROM (mode, mode)
2010 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type)
2011 && GET_MODE_INNER (mode) == innermode)
2012 {
2013 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
2014 {
2015 static bool warnedavx512f;
2016 static bool warnedavx512f_ret;
2017
2018 if (cum && cum->warn_avx512f && !warnedavx512f)
2019 {
2020 if (warning (OPT_Wpsabi, "AVX512F vector argument "
2021 "without AVX512F enabled changes the ABI"))
2022 warnedavx512f = true;
2023 }
2024 else if (in_return && !warnedavx512f_ret)
2025 {
2026 if (warning (OPT_Wpsabi, "AVX512F vector return "
2027 "without AVX512F enabled changes the ABI"))
2028 warnedavx512f_ret = true;
2029 }
2030
2031 return TYPE_MODE (type);
2032 }
2033 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2034 {
2035 static bool warnedavx;
2036 static bool warnedavx_ret;
2037
2038 if (cum && cum->warn_avx && !warnedavx)
2039 {
2040 if (warning (OPT_Wpsabi, "AVX vector argument "
2041 "without AVX enabled changes the ABI"))
2042 warnedavx = true;
2043 }
2044 else if (in_return && !warnedavx_ret)
2045 {
2046 if (warning (OPT_Wpsabi, "AVX vector return "
2047 "without AVX enabled changes the ABI"))
2048 warnedavx_ret = true;
2049 }
2050
2051 return TYPE_MODE (type);
2052 }
2053 else if (((size == 8 && TARGET_64BIT) || size == 16)
2054 && !TARGET_SSE
2055 && !TARGET_IAMCU)
2056 {
2057 static bool warnedsse;
2058 static bool warnedsse_ret;
2059
2060 if (cum && cum->warn_sse && !warnedsse)
2061 {
2062 if (warning (OPT_Wpsabi, "SSE vector argument "
2063 "without SSE enabled changes the ABI"))
2064 warnedsse = true;
2065 }
2066 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2067 {
2068 if (warning (OPT_Wpsabi, "SSE vector return "
2069 "without SSE enabled changes the ABI"))
2070 warnedsse_ret = true;
2071 }
2072 }
2073 else if ((size == 8 && !TARGET_64BIT)
2074 && (!cfun
2075 || cfun->machine->func_type == TYPE_NORMAL)
2076 && !TARGET_MMX
2077 && !TARGET_IAMCU)
2078 {
2079 static bool warnedmmx;
2080 static bool warnedmmx_ret;
2081
2082 if (cum && cum->warn_mmx && !warnedmmx)
2083 {
2084 if (warning (OPT_Wpsabi, "MMX vector argument "
2085 "without MMX enabled changes the ABI"))
2086 warnedmmx = true;
2087 }
2088 else if (in_return && !warnedmmx_ret)
2089 {
2090 if (warning (OPT_Wpsabi, "MMX vector return "
2091 "without MMX enabled changes the ABI"))
2092 warnedmmx_ret = true;
2093 }
2094 }
2095 return mode;
2096 }
2097
2098 gcc_unreachable ();
2099 }
2100 }
2101
2102 return mode;
2103}
2104
2105/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2106 this may not agree with the mode that the type system has chosen for the
2107 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2108 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2109
2110static rtx
2111gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2112 unsigned int regno)
2113{
2114 rtx tmp;
2115
2116 if (orig_mode != BLKmode)
2117 tmp = gen_rtx_REG (orig_mode, regno);
2118 else
2119 {
2120 tmp = gen_rtx_REG (mode, regno);
2121 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2122 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2123 }
2124
2125 return tmp;
2126}
2127
2128/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2129 of this code is to classify each 8bytes of incoming argument by the register
2130 class and assign registers accordingly. */
2131
2132/* Return the union class of CLASS1 and CLASS2.
2133 See the x86-64 PS ABI for details. */
2134
2135static enum x86_64_reg_class
2136merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2137{
2138 /* Rule #1: If both classes are equal, this is the resulting class. */
2139 if (class1 == class2)
2140 return class1;
2141
2142 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2143 the other class. */
2144 if (class1 == X86_64_NO_CLASS)
2145 return class2;
2146 if (class2 == X86_64_NO_CLASS)
2147 return class1;
2148
2149 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2150 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2151 return X86_64_MEMORY_CLASS;
2152
2153 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2154 if ((class1 == X86_64_INTEGERSI_CLASS
2155 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2156 || (class2 == X86_64_INTEGERSI_CLASS
2157 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2158 return X86_64_INTEGERSI_CLASS;
2159 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2160 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2161 return X86_64_INTEGER_CLASS;
2162
2163 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2164 MEMORY is used. */
2165 if (class1 == X86_64_X87_CLASS
2166 || class1 == X86_64_X87UP_CLASS
2167 || class1 == X86_64_COMPLEX_X87_CLASS
2168 || class2 == X86_64_X87_CLASS
2169 || class2 == X86_64_X87UP_CLASS
2170 || class2 == X86_64_COMPLEX_X87_CLASS)
2171 return X86_64_MEMORY_CLASS;
2172
2173 /* Rule #6: Otherwise class SSE is used. */
2174 return X86_64_SSE_CLASS;
2175}
2176
2177/* Classify the argument of type TYPE and mode MODE.
2178 CLASSES will be filled by the register class used to pass each word
2179 of the operand. The number of words is returned. In case the parameter
2180 should be passed in memory, 0 is returned. As a special case for zero
2181 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2182
2183 BIT_OFFSET is used internally for handling records and specifies offset
2184 of the offset in bits modulo 512 to avoid overflow cases.
2185
2186 See the x86-64 PS ABI for details.
2187*/
2188
2189static int
2190classify_argument (machine_mode mode, const_tree type,
2191 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2192 int &zero_width_bitfields)
2193{
2194 HOST_WIDE_INT bytes
2195 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2196 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2197
2198 /* Variable sized entities are always passed/returned in memory. */
2199 if (bytes < 0)
2200 return 0;
2201
2202 if (mode != VOIDmode)
2203 {
2204 /* The value of "named" doesn't matter. */
2205 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2206 if (targetm.calls.must_pass_in_stack (arg))
2207 return 0;
2208 }
2209
2210 if (type && (AGGREGATE_TYPE_P (type)
2211 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2212 {
2213 int i;
2214 tree field;
2215 enum x86_64_reg_class subclasses[MAX_CLASSES];
2216
2217 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2218 if (bytes > 64)
2219 return 0;
2220
2221 for (i = 0; i < words; i++)
2222 classes[i] = X86_64_NO_CLASS;
2223
2224 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2225 signalize memory class, so handle it as special case. */
2226 if (!words)
2227 {
2228 classes[0] = X86_64_NO_CLASS;
2229 return 1;
2230 }
2231
2232 /* Classify each field of record and merge classes. */
2233 switch (TREE_CODE (type))
2234 {
2235 case RECORD_TYPE:
2236 /* And now merge the fields of structure. */
2237 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2238 {
2239 if (TREE_CODE (field) == FIELD_DECL)
2240 {
2241 int num;
2242
2243 if (TREE_TYPE (field) == error_mark_node)
2244 continue;
2245
2246 /* Bitfields are always classified as integer. Handle them
2247 early, since later code would consider them to be
2248 misaligned integers. */
2249 if (DECL_BIT_FIELD (field))
2250 {
2251 if (integer_zerop (DECL_SIZE (field)))
2252 {
2253 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2254 continue;
2255 if (zero_width_bitfields != 2)
2256 {
2257 zero_width_bitfields = 1;
2258 continue;
2259 }
2260 }
2261 for (i = (int_bit_position (field)
2262 + (bit_offset % 64)) / 8 / 8;
2263 i < ((int_bit_position (field) + (bit_offset % 64))
2264 + tree_to_shwi (DECL_SIZE (field))
2265 + 63) / 8 / 8; i++)
2266 classes[i]
2267 = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]);
2268 }
2269 else
2270 {
2271 int pos;
2272
2273 type = TREE_TYPE (field);
2274
2275 /* Flexible array member is ignored. */
2276 if (TYPE_MODE (type) == BLKmode
2277 && TREE_CODE (type) == ARRAY_TYPE
2278 && TYPE_SIZE (type) == NULL_TREE
2279 && TYPE_DOMAIN (type) != NULL_TREE
2280 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2281 == NULL_TREE))
2282 {
2283 static bool warned;
2284
2285 if (!warned && warn_psabi)
2286 {
2287 warned = true;
2288 inform (input_location,
2289 "the ABI of passing struct with"
2290 " a flexible array member has"
2291 " changed in GCC 4.4");
2292 }
2293 continue;
2294 }
2295 num = classify_argument (TYPE_MODE (type), type,
2296 classes: subclasses,
2297 bit_offset: (int_bit_position (field)
2298 + bit_offset) % 512,
2299 zero_width_bitfields);
2300 if (!num)
2301 return 0;
2302 pos = (int_bit_position (field)
2303 + (bit_offset % 64)) / 8 / 8;
2304 for (i = 0; i < num && (i + pos) < words; i++)
2305 classes[i + pos]
2306 = merge_classes (class1: subclasses[i], class2: classes[i + pos]);
2307 }
2308 }
2309 }
2310 break;
2311
2312 case ARRAY_TYPE:
2313 /* Arrays are handled as small records. */
2314 {
2315 int num;
2316 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2317 TREE_TYPE (type), classes: subclasses, bit_offset,
2318 zero_width_bitfields);
2319 if (!num)
2320 return 0;
2321
2322 /* The partial classes are now full classes. */
2323 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2324 subclasses[0] = X86_64_SSE_CLASS;
2325 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2326 subclasses[0] = X86_64_SSE_CLASS;
2327 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2328 && !((bit_offset % 64) == 0 && bytes == 4))
2329 subclasses[0] = X86_64_INTEGER_CLASS;
2330
2331 for (i = 0; i < words; i++)
2332 classes[i] = subclasses[i % num];
2333
2334 break;
2335 }
2336 case UNION_TYPE:
2337 case QUAL_UNION_TYPE:
2338 /* Unions are similar to RECORD_TYPE but offset is always 0.
2339 */
2340 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2341 {
2342 if (TREE_CODE (field) == FIELD_DECL)
2343 {
2344 int num;
2345
2346 if (TREE_TYPE (field) == error_mark_node)
2347 continue;
2348
2349 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2350 TREE_TYPE (field), classes: subclasses,
2351 bit_offset, zero_width_bitfields);
2352 if (!num)
2353 return 0;
2354 for (i = 0; i < num && i < words; i++)
2355 classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]);
2356 }
2357 }
2358 break;
2359
2360 case BITINT_TYPE:
2361 /* _BitInt(N) for N > 64 is passed as structure containing
2362 (N + 63) / 64 64-bit elements. */
2363 if (words > 2)
2364 return 0;
2365 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2366 return 2;
2367
2368 default:
2369 gcc_unreachable ();
2370 }
2371
2372 if (words > 2)
2373 {
2374 /* When size > 16 bytes, if the first one isn't
2375 X86_64_SSE_CLASS or any other ones aren't
2376 X86_64_SSEUP_CLASS, everything should be passed in
2377 memory. */
2378 if (classes[0] != X86_64_SSE_CLASS)
2379 return 0;
2380
2381 for (i = 1; i < words; i++)
2382 if (classes[i] != X86_64_SSEUP_CLASS)
2383 return 0;
2384 }
2385
2386 /* Final merger cleanup. */
2387 for (i = 0; i < words; i++)
2388 {
2389 /* If one class is MEMORY, everything should be passed in
2390 memory. */
2391 if (classes[i] == X86_64_MEMORY_CLASS)
2392 return 0;
2393
2394 /* The X86_64_SSEUP_CLASS should be always preceded by
2395 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2396 if (classes[i] == X86_64_SSEUP_CLASS
2397 && classes[i - 1] != X86_64_SSE_CLASS
2398 && classes[i - 1] != X86_64_SSEUP_CLASS)
2399 {
2400 /* The first one should never be X86_64_SSEUP_CLASS. */
2401 gcc_assert (i != 0);
2402 classes[i] = X86_64_SSE_CLASS;
2403 }
2404
2405 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2406 everything should be passed in memory. */
2407 if (classes[i] == X86_64_X87UP_CLASS
2408 && (classes[i - 1] != X86_64_X87_CLASS))
2409 {
2410 static bool warned;
2411
2412 /* The first one should never be X86_64_X87UP_CLASS. */
2413 gcc_assert (i != 0);
2414 if (!warned && warn_psabi)
2415 {
2416 warned = true;
2417 inform (input_location,
2418 "the ABI of passing union with %<long double%>"
2419 " has changed in GCC 4.4");
2420 }
2421 return 0;
2422 }
2423 }
2424 return words;
2425 }
2426
2427 /* Compute alignment needed. We align all types to natural boundaries with
2428 exception of XFmode that is aligned to 64bits. */
2429 if (mode != VOIDmode && mode != BLKmode)
2430 {
2431 int mode_alignment = GET_MODE_BITSIZE (mode);
2432
2433 if (mode == XFmode)
2434 mode_alignment = 128;
2435 else if (mode == XCmode)
2436 mode_alignment = 256;
2437 if (COMPLEX_MODE_P (mode))
2438 mode_alignment /= 2;
2439 /* Misaligned fields are always returned in memory. */
2440 if (bit_offset % mode_alignment)
2441 return 0;
2442 }
2443
2444 /* for V1xx modes, just use the base mode */
2445 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2446 && GET_MODE_UNIT_SIZE (mode) == bytes)
2447 mode = GET_MODE_INNER (mode);
2448
2449 /* Classification of atomic types. */
2450 switch (mode)
2451 {
2452 case E_SDmode:
2453 case E_DDmode:
2454 classes[0] = X86_64_SSE_CLASS;
2455 return 1;
2456 case E_TDmode:
2457 classes[0] = X86_64_SSE_CLASS;
2458 classes[1] = X86_64_SSEUP_CLASS;
2459 return 2;
2460 case E_DImode:
2461 case E_SImode:
2462 case E_HImode:
2463 case E_QImode:
2464 case E_CSImode:
2465 case E_CHImode:
2466 case E_CQImode:
2467 {
2468 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2469
2470 /* Analyze last 128 bits only. */
2471 size = (size - 1) & 0x7f;
2472
2473 if (size < 32)
2474 {
2475 classes[0] = X86_64_INTEGERSI_CLASS;
2476 return 1;
2477 }
2478 else if (size < 64)
2479 {
2480 classes[0] = X86_64_INTEGER_CLASS;
2481 return 1;
2482 }
2483 else if (size < 64+32)
2484 {
2485 classes[0] = X86_64_INTEGER_CLASS;
2486 classes[1] = X86_64_INTEGERSI_CLASS;
2487 return 2;
2488 }
2489 else if (size < 64+64)
2490 {
2491 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2492 return 2;
2493 }
2494 else
2495 gcc_unreachable ();
2496 }
2497 case E_CDImode:
2498 case E_TImode:
2499 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2500 return 2;
2501 case E_COImode:
2502 case E_OImode:
2503 /* OImode shouldn't be used directly. */
2504 gcc_unreachable ();
2505 case E_CTImode:
2506 return 0;
2507 case E_HFmode:
2508 case E_BFmode:
2509 if (!(bit_offset % 64))
2510 classes[0] = X86_64_SSEHF_CLASS;
2511 else
2512 classes[0] = X86_64_SSE_CLASS;
2513 return 1;
2514 case E_SFmode:
2515 if (!(bit_offset % 64))
2516 classes[0] = X86_64_SSESF_CLASS;
2517 else
2518 classes[0] = X86_64_SSE_CLASS;
2519 return 1;
2520 case E_DFmode:
2521 classes[0] = X86_64_SSEDF_CLASS;
2522 return 1;
2523 case E_XFmode:
2524 classes[0] = X86_64_X87_CLASS;
2525 classes[1] = X86_64_X87UP_CLASS;
2526 return 2;
2527 case E_TFmode:
2528 classes[0] = X86_64_SSE_CLASS;
2529 classes[1] = X86_64_SSEUP_CLASS;
2530 return 2;
2531 case E_HCmode:
2532 case E_BCmode:
2533 classes[0] = X86_64_SSE_CLASS;
2534 if (!(bit_offset % 64))
2535 return 1;
2536 else
2537 {
2538 classes[1] = X86_64_SSEHF_CLASS;
2539 return 2;
2540 }
2541 case E_SCmode:
2542 classes[0] = X86_64_SSE_CLASS;
2543 if (!(bit_offset % 64))
2544 return 1;
2545 else
2546 {
2547 static bool warned;
2548
2549 if (!warned && warn_psabi)
2550 {
2551 warned = true;
2552 inform (input_location,
2553 "the ABI of passing structure with %<complex float%>"
2554 " member has changed in GCC 4.4");
2555 }
2556 classes[1] = X86_64_SSESF_CLASS;
2557 return 2;
2558 }
2559 case E_DCmode:
2560 classes[0] = X86_64_SSEDF_CLASS;
2561 classes[1] = X86_64_SSEDF_CLASS;
2562 return 2;
2563 case E_XCmode:
2564 classes[0] = X86_64_COMPLEX_X87_CLASS;
2565 return 1;
2566 case E_TCmode:
2567 /* This modes is larger than 16 bytes. */
2568 return 0;
2569 case E_V8SFmode:
2570 case E_V8SImode:
2571 case E_V32QImode:
2572 case E_V16HFmode:
2573 case E_V16BFmode:
2574 case E_V16HImode:
2575 case E_V4DFmode:
2576 case E_V4DImode:
2577 classes[0] = X86_64_SSE_CLASS;
2578 classes[1] = X86_64_SSEUP_CLASS;
2579 classes[2] = X86_64_SSEUP_CLASS;
2580 classes[3] = X86_64_SSEUP_CLASS;
2581 return 4;
2582 case E_V8DFmode:
2583 case E_V16SFmode:
2584 case E_V32HFmode:
2585 case E_V32BFmode:
2586 case E_V8DImode:
2587 case E_V16SImode:
2588 case E_V32HImode:
2589 case E_V64QImode:
2590 classes[0] = X86_64_SSE_CLASS;
2591 classes[1] = X86_64_SSEUP_CLASS;
2592 classes[2] = X86_64_SSEUP_CLASS;
2593 classes[3] = X86_64_SSEUP_CLASS;
2594 classes[4] = X86_64_SSEUP_CLASS;
2595 classes[5] = X86_64_SSEUP_CLASS;
2596 classes[6] = X86_64_SSEUP_CLASS;
2597 classes[7] = X86_64_SSEUP_CLASS;
2598 return 8;
2599 case E_V4SFmode:
2600 case E_V4SImode:
2601 case E_V16QImode:
2602 case E_V8HImode:
2603 case E_V8HFmode:
2604 case E_V8BFmode:
2605 case E_V2DFmode:
2606 case E_V2DImode:
2607 classes[0] = X86_64_SSE_CLASS;
2608 classes[1] = X86_64_SSEUP_CLASS;
2609 return 2;
2610 case E_V1TImode:
2611 case E_V1DImode:
2612 case E_V2SFmode:
2613 case E_V2SImode:
2614 case E_V4HImode:
2615 case E_V4HFmode:
2616 case E_V4BFmode:
2617 case E_V2HFmode:
2618 case E_V2BFmode:
2619 case E_V8QImode:
2620 classes[0] = X86_64_SSE_CLASS;
2621 return 1;
2622 case E_BLKmode:
2623 case E_VOIDmode:
2624 return 0;
2625 default:
2626 gcc_assert (VECTOR_MODE_P (mode));
2627
2628 if (bytes > 16)
2629 return 0;
2630
2631 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2632
2633 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2634 classes[0] = X86_64_INTEGERSI_CLASS;
2635 else
2636 classes[0] = X86_64_INTEGER_CLASS;
2637 classes[1] = X86_64_INTEGER_CLASS;
2638 return 1 + (bytes > 8);
2639 }
2640}
2641
2642/* Wrapper around classify_argument with the extra zero_width_bitfields
2643 argument, to diagnose GCC 12.1 ABI differences for C. */
2644
2645static int
2646classify_argument (machine_mode mode, const_tree type,
2647 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2648{
2649 int zero_width_bitfields = 0;
2650 static bool warned = false;
2651 int n = classify_argument (mode, type, classes, bit_offset,
2652 zero_width_bitfields);
2653 if (!zero_width_bitfields || warned || !warn_psabi)
2654 return n;
2655 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2656 zero_width_bitfields = 2;
2657 if (classify_argument (mode, type, classes: alt_classes, bit_offset,
2658 zero_width_bitfields) != n)
2659 zero_width_bitfields = 3;
2660 else
2661 for (int i = 0; i < n; i++)
2662 if (classes[i] != alt_classes[i])
2663 {
2664 zero_width_bitfields = 3;
2665 break;
2666 }
2667 if (zero_width_bitfields == 3)
2668 {
2669 warned = true;
2670 const char *url
2671 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2672
2673 inform (input_location,
2674 "the ABI of passing C structures with zero-width bit-fields"
2675 " has changed in GCC %{12.1%}", url);
2676 }
2677 return n;
2678}
2679
2680/* Examine the argument and return set number of register required in each
2681 class. Return true iff parameter should be passed in memory. */
2682
2683static bool
2684examine_argument (machine_mode mode, const_tree type, int in_return,
2685 int *int_nregs, int *sse_nregs)
2686{
2687 enum x86_64_reg_class regclass[MAX_CLASSES];
2688 int n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2689
2690 *int_nregs = 0;
2691 *sse_nregs = 0;
2692
2693 if (!n)
2694 return true;
2695 for (n--; n >= 0; n--)
2696 switch (regclass[n])
2697 {
2698 case X86_64_INTEGER_CLASS:
2699 case X86_64_INTEGERSI_CLASS:
2700 (*int_nregs)++;
2701 break;
2702 case X86_64_SSE_CLASS:
2703 case X86_64_SSEHF_CLASS:
2704 case X86_64_SSESF_CLASS:
2705 case X86_64_SSEDF_CLASS:
2706 (*sse_nregs)++;
2707 break;
2708 case X86_64_NO_CLASS:
2709 case X86_64_SSEUP_CLASS:
2710 break;
2711 case X86_64_X87_CLASS:
2712 case X86_64_X87UP_CLASS:
2713 case X86_64_COMPLEX_X87_CLASS:
2714 if (!in_return)
2715 return true;
2716 break;
2717 case X86_64_MEMORY_CLASS:
2718 gcc_unreachable ();
2719 }
2720
2721 return false;
2722}
2723
2724/* Construct container for the argument used by GCC interface. See
2725 FUNCTION_ARG for the detailed description. */
2726
2727static rtx
2728construct_container (machine_mode mode, machine_mode orig_mode,
2729 const_tree type, int in_return, int nintregs, int nsseregs,
2730 const int *intreg, int sse_regno)
2731{
2732 /* The following variables hold the static issued_error state. */
2733 static bool issued_sse_arg_error;
2734 static bool issued_sse_ret_error;
2735 static bool issued_x87_ret_error;
2736
2737 machine_mode tmpmode;
2738 int bytes
2739 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2740 enum x86_64_reg_class regclass[MAX_CLASSES];
2741 int n;
2742 int i;
2743 int nexps = 0;
2744 int needed_sseregs, needed_intregs;
2745 rtx exp[MAX_CLASSES];
2746 rtx ret;
2747
2748 n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2749 if (!n)
2750 return NULL;
2751 if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs,
2752 sse_nregs: &needed_sseregs))
2753 return NULL;
2754 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2755 return NULL;
2756
2757 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2758 some less clueful developer tries to use floating-point anyway. */
2759 if (needed_sseregs
2760 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2761 {
2762 /* Return early if we shouldn't raise an error for invalid
2763 calls. */
2764 if (cfun != NULL && cfun->machine->silent_p)
2765 return NULL;
2766 if (in_return)
2767 {
2768 if (!issued_sse_ret_error)
2769 {
2770 if (VALID_SSE2_TYPE_MODE (mode))
2771 error ("SSE register return with SSE2 disabled");
2772 else
2773 error ("SSE register return with SSE disabled");
2774 issued_sse_ret_error = true;
2775 }
2776 }
2777 else if (!issued_sse_arg_error)
2778 {
2779 if (VALID_SSE2_TYPE_MODE (mode))
2780 error ("SSE register argument with SSE2 disabled");
2781 else
2782 error ("SSE register argument with SSE disabled");
2783 issued_sse_arg_error = true;
2784 }
2785 return NULL;
2786 }
2787
2788 /* Likewise, error if the ABI requires us to return values in the
2789 x87 registers and the user specified -mno-80387. */
2790 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2791 for (i = 0; i < n; i++)
2792 if (regclass[i] == X86_64_X87_CLASS
2793 || regclass[i] == X86_64_X87UP_CLASS
2794 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2795 {
2796 /* Return early if we shouldn't raise an error for invalid
2797 calls. */
2798 if (cfun != NULL && cfun->machine->silent_p)
2799 return NULL;
2800 if (!issued_x87_ret_error)
2801 {
2802 error ("x87 register return with x87 disabled");
2803 issued_x87_ret_error = true;
2804 }
2805 return NULL;
2806 }
2807
2808 /* First construct simple cases. Avoid SCmode, since we want to use
2809 single register to pass this type. */
2810 if (n == 1 && mode != SCmode && mode != HCmode)
2811 switch (regclass[0])
2812 {
2813 case X86_64_INTEGER_CLASS:
2814 case X86_64_INTEGERSI_CLASS:
2815 return gen_rtx_REG (mode, intreg[0]);
2816 case X86_64_SSE_CLASS:
2817 case X86_64_SSEHF_CLASS:
2818 case X86_64_SSESF_CLASS:
2819 case X86_64_SSEDF_CLASS:
2820 if (mode != BLKmode)
2821 return gen_reg_or_parallel (mode, orig_mode,
2822 GET_SSE_REGNO (sse_regno));
2823 break;
2824 case X86_64_X87_CLASS:
2825 case X86_64_COMPLEX_X87_CLASS:
2826 return gen_rtx_REG (mode, FIRST_STACK_REG);
2827 case X86_64_NO_CLASS:
2828 /* Zero sized array, struct or class. */
2829 return NULL;
2830 default:
2831 gcc_unreachable ();
2832 }
2833 if (n == 2
2834 && regclass[0] == X86_64_SSE_CLASS
2835 && regclass[1] == X86_64_SSEUP_CLASS
2836 && mode != BLKmode)
2837 return gen_reg_or_parallel (mode, orig_mode,
2838 GET_SSE_REGNO (sse_regno));
2839 if (n == 4
2840 && regclass[0] == X86_64_SSE_CLASS
2841 && regclass[1] == X86_64_SSEUP_CLASS
2842 && regclass[2] == X86_64_SSEUP_CLASS
2843 && regclass[3] == X86_64_SSEUP_CLASS
2844 && mode != BLKmode)
2845 return gen_reg_or_parallel (mode, orig_mode,
2846 GET_SSE_REGNO (sse_regno));
2847 if (n == 8
2848 && regclass[0] == X86_64_SSE_CLASS
2849 && regclass[1] == X86_64_SSEUP_CLASS
2850 && regclass[2] == X86_64_SSEUP_CLASS
2851 && regclass[3] == X86_64_SSEUP_CLASS
2852 && regclass[4] == X86_64_SSEUP_CLASS
2853 && regclass[5] == X86_64_SSEUP_CLASS
2854 && regclass[6] == X86_64_SSEUP_CLASS
2855 && regclass[7] == X86_64_SSEUP_CLASS
2856 && mode != BLKmode)
2857 return gen_reg_or_parallel (mode, orig_mode,
2858 GET_SSE_REGNO (sse_regno));
2859 if (n == 2
2860 && regclass[0] == X86_64_X87_CLASS
2861 && regclass[1] == X86_64_X87UP_CLASS)
2862 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2863
2864 if (n == 2
2865 && regclass[0] == X86_64_INTEGER_CLASS
2866 && regclass[1] == X86_64_INTEGER_CLASS
2867 && (mode == CDImode || mode == TImode || mode == BLKmode)
2868 && intreg[0] + 1 == intreg[1])
2869 {
2870 if (mode == BLKmode)
2871 {
2872 /* Use TImode for BLKmode values in 2 integer registers. */
2873 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2874 gen_rtx_REG (TImode, intreg[0]),
2875 GEN_INT (0));
2876 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2877 XVECEXP (ret, 0, 0) = exp[0];
2878 return ret;
2879 }
2880 else
2881 return gen_rtx_REG (mode, intreg[0]);
2882 }
2883
2884 /* Otherwise figure out the entries of the PARALLEL. */
2885 for (i = 0; i < n; i++)
2886 {
2887 int pos;
2888
2889 switch (regclass[i])
2890 {
2891 case X86_64_NO_CLASS:
2892 break;
2893 case X86_64_INTEGER_CLASS:
2894 case X86_64_INTEGERSI_CLASS:
2895 /* Merge TImodes on aligned occasions here too. */
2896 if (i * 8 + 8 > bytes)
2897 {
2898 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2899 if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode))
2900 /* We've requested 24 bytes we
2901 don't have mode for. Use DImode. */
2902 tmpmode = DImode;
2903 }
2904 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2905 tmpmode = SImode;
2906 else
2907 tmpmode = DImode;
2908 exp [nexps++]
2909 = gen_rtx_EXPR_LIST (VOIDmode,
2910 gen_rtx_REG (tmpmode, *intreg),
2911 GEN_INT (i*8));
2912 intreg++;
2913 break;
2914 case X86_64_SSEHF_CLASS:
2915 tmpmode = (mode == BFmode ? BFmode : HFmode);
2916 exp [nexps++]
2917 = gen_rtx_EXPR_LIST (VOIDmode,
2918 gen_rtx_REG (tmpmode,
2919 GET_SSE_REGNO (sse_regno)),
2920 GEN_INT (i*8));
2921 sse_regno++;
2922 break;
2923 case X86_64_SSESF_CLASS:
2924 exp [nexps++]
2925 = gen_rtx_EXPR_LIST (VOIDmode,
2926 gen_rtx_REG (SFmode,
2927 GET_SSE_REGNO (sse_regno)),
2928 GEN_INT (i*8));
2929 sse_regno++;
2930 break;
2931 case X86_64_SSEDF_CLASS:
2932 exp [nexps++]
2933 = gen_rtx_EXPR_LIST (VOIDmode,
2934 gen_rtx_REG (DFmode,
2935 GET_SSE_REGNO (sse_regno)),
2936 GEN_INT (i*8));
2937 sse_regno++;
2938 break;
2939 case X86_64_SSE_CLASS:
2940 pos = i;
2941 switch (n)
2942 {
2943 case 1:
2944 tmpmode = DImode;
2945 break;
2946 case 2:
2947 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2948 {
2949 tmpmode = TImode;
2950 i++;
2951 }
2952 else
2953 tmpmode = DImode;
2954 break;
2955 case 4:
2956 gcc_assert (i == 0
2957 && regclass[1] == X86_64_SSEUP_CLASS
2958 && regclass[2] == X86_64_SSEUP_CLASS
2959 && regclass[3] == X86_64_SSEUP_CLASS);
2960 tmpmode = OImode;
2961 i += 3;
2962 break;
2963 case 8:
2964 gcc_assert (i == 0
2965 && regclass[1] == X86_64_SSEUP_CLASS
2966 && regclass[2] == X86_64_SSEUP_CLASS
2967 && regclass[3] == X86_64_SSEUP_CLASS
2968 && regclass[4] == X86_64_SSEUP_CLASS
2969 && regclass[5] == X86_64_SSEUP_CLASS
2970 && regclass[6] == X86_64_SSEUP_CLASS
2971 && regclass[7] == X86_64_SSEUP_CLASS);
2972 tmpmode = XImode;
2973 i += 7;
2974 break;
2975 default:
2976 gcc_unreachable ();
2977 }
2978 exp [nexps++]
2979 = gen_rtx_EXPR_LIST (VOIDmode,
2980 gen_rtx_REG (tmpmode,
2981 GET_SSE_REGNO (sse_regno)),
2982 GEN_INT (pos*8));
2983 sse_regno++;
2984 break;
2985 default:
2986 gcc_unreachable ();
2987 }
2988 }
2989
2990 /* Empty aligned struct, union or class. */
2991 if (nexps == 0)
2992 return NULL;
2993
2994 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2995 for (i = 0; i < nexps; i++)
2996 XVECEXP (ret, 0, i) = exp [i];
2997 return ret;
2998}
2999
3000/* Update the data in CUM to advance over an argument of mode MODE
3001 and data type TYPE. (TYPE is null for libcalls where that information
3002 may not be available.)
3003
3004 Return a number of integer regsiters advanced over. */
3005
3006static int
3007function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3008 const_tree type, HOST_WIDE_INT bytes,
3009 HOST_WIDE_INT words)
3010{
3011 int res = 0;
3012 bool error_p = false;
3013
3014 if (TARGET_IAMCU)
3015 {
3016 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3017 bytes in registers. */
3018 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3019 goto pass_in_reg;
3020 return res;
3021 }
3022
3023 switch (mode)
3024 {
3025 default:
3026 break;
3027
3028 case E_BLKmode:
3029 if (bytes < 0)
3030 break;
3031 /* FALLTHRU */
3032
3033 case E_DImode:
3034 case E_SImode:
3035 case E_HImode:
3036 case E_QImode:
3037pass_in_reg:
3038 cum->words += words;
3039 cum->nregs -= words;
3040 cum->regno += words;
3041 if (cum->nregs >= 0)
3042 res = words;
3043 if (cum->nregs <= 0)
3044 {
3045 cum->nregs = 0;
3046 cfun->machine->arg_reg_available = false;
3047 cum->regno = 0;
3048 }
3049 break;
3050
3051 case E_OImode:
3052 /* OImode shouldn't be used directly. */
3053 gcc_unreachable ();
3054
3055 case E_DFmode:
3056 if (cum->float_in_sse == -1)
3057 error_p = true;
3058 if (cum->float_in_sse < 2)
3059 break;
3060 /* FALLTHRU */
3061 case E_SFmode:
3062 if (cum->float_in_sse == -1)
3063 error_p = true;
3064 if (cum->float_in_sse < 1)
3065 break;
3066 /* FALLTHRU */
3067
3068 case E_V16HFmode:
3069 case E_V16BFmode:
3070 case E_V8SFmode:
3071 case E_V8SImode:
3072 case E_V64QImode:
3073 case E_V32HImode:
3074 case E_V16SImode:
3075 case E_V8DImode:
3076 case E_V32HFmode:
3077 case E_V32BFmode:
3078 case E_V16SFmode:
3079 case E_V8DFmode:
3080 case E_V32QImode:
3081 case E_V16HImode:
3082 case E_V4DFmode:
3083 case E_V4DImode:
3084 case E_TImode:
3085 case E_V16QImode:
3086 case E_V8HImode:
3087 case E_V4SImode:
3088 case E_V2DImode:
3089 case E_V8HFmode:
3090 case E_V8BFmode:
3091 case E_V4SFmode:
3092 case E_V2DFmode:
3093 if (!type || !AGGREGATE_TYPE_P (type))
3094 {
3095 cum->sse_words += words;
3096 cum->sse_nregs -= 1;
3097 cum->sse_regno += 1;
3098 if (cum->sse_nregs <= 0)
3099 {
3100 cum->sse_nregs = 0;
3101 cum->sse_regno = 0;
3102 }
3103 }
3104 break;
3105
3106 case E_V8QImode:
3107 case E_V4HImode:
3108 case E_V4HFmode:
3109 case E_V4BFmode:
3110 case E_V2SImode:
3111 case E_V2SFmode:
3112 case E_V1TImode:
3113 case E_V1DImode:
3114 if (!type || !AGGREGATE_TYPE_P (type))
3115 {
3116 cum->mmx_words += words;
3117 cum->mmx_nregs -= 1;
3118 cum->mmx_regno += 1;
3119 if (cum->mmx_nregs <= 0)
3120 {
3121 cum->mmx_nregs = 0;
3122 cum->mmx_regno = 0;
3123 }
3124 }
3125 break;
3126 }
3127 if (error_p)
3128 {
3129 cum->float_in_sse = 0;
3130 error ("calling %qD with SSE calling convention without "
3131 "SSE/SSE2 enabled", cum->decl);
3132 sorry ("this is a GCC bug that can be worked around by adding "
3133 "attribute used to function called");
3134 }
3135
3136 return res;
3137}
3138
3139static int
3140function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3141 const_tree type, HOST_WIDE_INT words, bool named)
3142{
3143 int int_nregs, sse_nregs;
3144
3145 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3146 if (!named && (VALID_AVX512F_REG_MODE (mode)
3147 || VALID_AVX256_REG_MODE (mode)))
3148 return 0;
3149
3150 if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs)
3151 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3152 {
3153 cum->nregs -= int_nregs;
3154 cum->sse_nregs -= sse_nregs;
3155 cum->regno += int_nregs;
3156 cum->sse_regno += sse_nregs;
3157 return int_nregs;
3158 }
3159 else
3160 {
3161 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3162 cum->words = ROUND_UP (cum->words, align);
3163 cum->words += words;
3164 return 0;
3165 }
3166}
3167
3168static int
3169function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3170 HOST_WIDE_INT words)
3171{
3172 /* Otherwise, this should be passed indirect. */
3173 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3174
3175 cum->words += words;
3176 if (cum->nregs > 0)
3177 {
3178 cum->nregs -= 1;
3179 cum->regno += 1;
3180 return 1;
3181 }
3182 return 0;
3183}
3184
3185/* Update the data in CUM to advance over argument ARG. */
3186
3187static void
3188ix86_function_arg_advance (cumulative_args_t cum_v,
3189 const function_arg_info &arg)
3190{
3191 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3192 machine_mode mode = arg.mode;
3193 HOST_WIDE_INT bytes, words;
3194 int nregs;
3195
3196 /* The argument of interrupt handler is a special case and is
3197 handled in ix86_function_arg. */
3198 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3199 return;
3200
3201 bytes = arg.promoted_size_in_bytes ();
3202 words = CEIL (bytes, UNITS_PER_WORD);
3203
3204 if (arg.type)
3205 mode = type_natural_mode (type: arg.type, NULL, in_return: false);
3206
3207 if (TARGET_64BIT)
3208 {
3209 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3210
3211 if (call_abi == MS_ABI)
3212 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3213 else
3214 nregs = function_arg_advance_64 (cum, mode, type: arg.type, words,
3215 named: arg.named);
3216 }
3217 else
3218 nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words);
3219
3220 if (!nregs)
3221 {
3222 /* Track if there are outgoing arguments on stack. */
3223 if (cum->caller)
3224 cfun->machine->outgoing_args_on_stack = true;
3225 }
3226}
3227
3228/* Define where to put the arguments to a function.
3229 Value is zero to push the argument on the stack,
3230 or a hard register in which to store the argument.
3231
3232 MODE is the argument's machine mode.
3233 TYPE is the data type of the argument (as a tree).
3234 This is null for libcalls where that information may
3235 not be available.
3236 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3237 the preceding args and about the function being called.
3238 NAMED is nonzero if this argument is a named parameter
3239 (otherwise it is an extra parameter matching an ellipsis). */
3240
3241static rtx
3242function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3243 machine_mode orig_mode, const_tree type,
3244 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3245{
3246 bool error_p = false;
3247
3248 /* Avoid the AL settings for the Unix64 ABI. */
3249 if (mode == VOIDmode)
3250 return constm1_rtx;
3251
3252 if (TARGET_IAMCU)
3253 {
3254 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3255 bytes in registers. */
3256 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3257 goto pass_in_reg;
3258 return NULL_RTX;
3259 }
3260
3261 switch (mode)
3262 {
3263 default:
3264 break;
3265
3266 case E_BLKmode:
3267 if (bytes < 0)
3268 break;
3269 /* FALLTHRU */
3270 case E_DImode:
3271 case E_SImode:
3272 case E_HImode:
3273 case E_QImode:
3274pass_in_reg:
3275 if (words <= cum->nregs)
3276 {
3277 int regno = cum->regno;
3278
3279 /* Fastcall allocates the first two DWORD (SImode) or
3280 smaller arguments to ECX and EDX if it isn't an
3281 aggregate type . */
3282 if (cum->fastcall)
3283 {
3284 if (mode == BLKmode
3285 || mode == DImode
3286 || (type && AGGREGATE_TYPE_P (type)))
3287 break;
3288
3289 /* ECX not EAX is the first allocated register. */
3290 if (regno == AX_REG)
3291 regno = CX_REG;
3292 }
3293 return gen_rtx_REG (mode, regno);
3294 }
3295 break;
3296
3297 case E_DFmode:
3298 if (cum->float_in_sse == -1)
3299 error_p = true;
3300 if (cum->float_in_sse < 2)
3301 break;
3302 /* FALLTHRU */
3303 case E_SFmode:
3304 if (cum->float_in_sse == -1)
3305 error_p = true;
3306 if (cum->float_in_sse < 1)
3307 break;
3308 /* FALLTHRU */
3309 case E_TImode:
3310 /* In 32bit, we pass TImode in xmm registers. */
3311 case E_V16QImode:
3312 case E_V8HImode:
3313 case E_V4SImode:
3314 case E_V2DImode:
3315 case E_V8HFmode:
3316 case E_V8BFmode:
3317 case E_V4SFmode:
3318 case E_V2DFmode:
3319 if (!type || !AGGREGATE_TYPE_P (type))
3320 {
3321 if (cum->sse_nregs)
3322 return gen_reg_or_parallel (mode, orig_mode,
3323 regno: cum->sse_regno + FIRST_SSE_REG);
3324 }
3325 break;
3326
3327 case E_OImode:
3328 case E_XImode:
3329 /* OImode and XImode shouldn't be used directly. */
3330 gcc_unreachable ();
3331
3332 case E_V64QImode:
3333 case E_V32HImode:
3334 case E_V16SImode:
3335 case E_V8DImode:
3336 case E_V32HFmode:
3337 case E_V32BFmode:
3338 case E_V16SFmode:
3339 case E_V8DFmode:
3340 case E_V16HFmode:
3341 case E_V16BFmode:
3342 case E_V8SFmode:
3343 case E_V8SImode:
3344 case E_V32QImode:
3345 case E_V16HImode:
3346 case E_V4DFmode:
3347 case E_V4DImode:
3348 if (!type || !AGGREGATE_TYPE_P (type))
3349 {
3350 if (cum->sse_nregs)
3351 return gen_reg_or_parallel (mode, orig_mode,
3352 regno: cum->sse_regno + FIRST_SSE_REG);
3353 }
3354 break;
3355
3356 case E_V8QImode:
3357 case E_V4HImode:
3358 case E_V4HFmode:
3359 case E_V4BFmode:
3360 case E_V2SImode:
3361 case E_V2SFmode:
3362 case E_V1TImode:
3363 case E_V1DImode:
3364 if (!type || !AGGREGATE_TYPE_P (type))
3365 {
3366 if (cum->mmx_nregs)
3367 return gen_reg_or_parallel (mode, orig_mode,
3368 regno: cum->mmx_regno + FIRST_MMX_REG);
3369 }
3370 break;
3371 }
3372 if (error_p)
3373 {
3374 cum->float_in_sse = 0;
3375 error ("calling %qD with SSE calling convention without "
3376 "SSE/SSE2 enabled", cum->decl);
3377 sorry ("this is a GCC bug that can be worked around by adding "
3378 "attribute used to function called");
3379 }
3380
3381 return NULL_RTX;
3382}
3383
3384static rtx
3385function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3386 machine_mode orig_mode, const_tree type, bool named)
3387{
3388 /* Handle a hidden AL argument containing number of registers
3389 for varargs x86-64 functions. */
3390 if (mode == VOIDmode)
3391 return GEN_INT (cum->maybe_vaarg
3392 ? (cum->sse_nregs < 0
3393 ? X86_64_SSE_REGPARM_MAX
3394 : cum->sse_regno)
3395 : -1);
3396
3397 switch (mode)
3398 {
3399 default:
3400 break;
3401
3402 case E_V16HFmode:
3403 case E_V16BFmode:
3404 case E_V8SFmode:
3405 case E_V8SImode:
3406 case E_V32QImode:
3407 case E_V16HImode:
3408 case E_V4DFmode:
3409 case E_V4DImode:
3410 case E_V32HFmode:
3411 case E_V32BFmode:
3412 case E_V16SFmode:
3413 case E_V16SImode:
3414 case E_V64QImode:
3415 case E_V32HImode:
3416 case E_V8DFmode:
3417 case E_V8DImode:
3418 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3419 if (!named)
3420 return NULL;
3421 break;
3422 }
3423
3424 return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs,
3425 nsseregs: cum->sse_nregs,
3426 intreg: &x86_64_int_parameter_registers [cum->regno],
3427 sse_regno: cum->sse_regno);
3428}
3429
3430static rtx
3431function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3432 machine_mode orig_mode, bool named, const_tree type,
3433 HOST_WIDE_INT bytes)
3434{
3435 unsigned int regno;
3436
3437 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3438 We use value of -2 to specify that current function call is MSABI. */
3439 if (mode == VOIDmode)
3440 return GEN_INT (-2);
3441
3442 /* If we've run out of registers, it goes on the stack. */
3443 if (cum->nregs == 0)
3444 return NULL_RTX;
3445
3446 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3447
3448 /* Only floating point modes are passed in anything but integer regs. */
3449 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3450 {
3451 if (named)
3452 {
3453 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3454 regno = cum->regno + FIRST_SSE_REG;
3455 }
3456 else
3457 {
3458 rtx t1, t2;
3459
3460 /* Unnamed floating parameters are passed in both the
3461 SSE and integer registers. */
3462 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3463 t2 = gen_rtx_REG (mode, regno);
3464 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3465 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3466 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3467 }
3468 }
3469 /* Handle aggregated types passed in register. */
3470 if (orig_mode == BLKmode)
3471 {
3472 if (bytes > 0 && bytes <= 8)
3473 mode = (bytes > 4 ? DImode : SImode);
3474 if (mode == BLKmode)
3475 mode = DImode;
3476 }
3477
3478 return gen_reg_or_parallel (mode, orig_mode, regno);
3479}
3480
3481/* Return where to put the arguments to a function.
3482 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3483
3484 ARG describes the argument while CUM gives information about the
3485 preceding args and about the function being called. */
3486
3487static rtx
3488ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3489{
3490 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3491 machine_mode mode = arg.mode;
3492 HOST_WIDE_INT bytes, words;
3493 rtx reg;
3494
3495 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3496 {
3497 gcc_assert (arg.type != NULL_TREE);
3498 if (POINTER_TYPE_P (arg.type))
3499 {
3500 /* This is the pointer argument. */
3501 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3502 /* It is at -WORD(AP) in the current frame in interrupt and
3503 exception handlers. */
3504 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3505 }
3506 else
3507 {
3508 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3509 && TREE_CODE (arg.type) == INTEGER_TYPE
3510 && TYPE_MODE (arg.type) == word_mode);
3511 /* The error code is the word-mode integer argument at
3512 -2 * WORD(AP) in the current frame of the exception
3513 handler. */
3514 reg = gen_rtx_MEM (word_mode,
3515 plus_constant (Pmode,
3516 arg_pointer_rtx,
3517 -2 * UNITS_PER_WORD));
3518 }
3519 return reg;
3520 }
3521
3522 bytes = arg.promoted_size_in_bytes ();
3523 words = CEIL (bytes, UNITS_PER_WORD);
3524
3525 /* To simplify the code below, represent vector types with a vector mode
3526 even if MMX/SSE are not active. */
3527 if (arg.type && VECTOR_TYPE_P (arg.type))
3528 mode = type_natural_mode (type: arg.type, cum, in_return: false);
3529
3530 if (TARGET_64BIT)
3531 {
3532 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3533
3534 if (call_abi == MS_ABI)
3535 reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named,
3536 type: arg.type, bytes);
3537 else
3538 reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named);
3539 }
3540 else
3541 reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words);
3542
3543 /* Track if there are outgoing arguments on stack. */
3544 if (reg == NULL_RTX && cum->caller)
3545 cfun->machine->outgoing_args_on_stack = true;
3546
3547 return reg;
3548}
3549
3550/* A C expression that indicates when an argument must be passed by
3551 reference. If nonzero for an argument, a copy of that argument is
3552 made in memory and a pointer to the argument is passed instead of
3553 the argument itself. The pointer is passed in whatever way is
3554 appropriate for passing a pointer to that type. */
3555
3556static bool
3557ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3558{
3559 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3560
3561 if (TARGET_64BIT)
3562 {
3563 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3564
3565 /* See Windows x64 Software Convention. */
3566 if (call_abi == MS_ABI)
3567 {
3568 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3569
3570 if (tree type = arg.type)
3571 {
3572 /* Arrays are passed by reference. */
3573 if (TREE_CODE (type) == ARRAY_TYPE)
3574 return true;
3575
3576 if (RECORD_OR_UNION_TYPE_P (type))
3577 {
3578 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3579 are passed by reference. */
3580 msize = int_size_in_bytes (type);
3581 }
3582 }
3583
3584 /* __m128 is passed by reference. */
3585 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3586 }
3587 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3588 return true;
3589 }
3590
3591 return false;
3592}
3593
3594/* Return true when TYPE should be 128bit aligned for 32bit argument
3595 passing ABI. XXX: This function is obsolete and is only used for
3596 checking psABI compatibility with previous versions of GCC. */
3597
3598static bool
3599ix86_compat_aligned_value_p (const_tree type)
3600{
3601 machine_mode mode = TYPE_MODE (type);
3602 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3603 || mode == TDmode
3604 || mode == TFmode
3605 || mode == TCmode)
3606 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3607 return true;
3608 if (TYPE_ALIGN (type) < 128)
3609 return false;
3610
3611 if (AGGREGATE_TYPE_P (type))
3612 {
3613 /* Walk the aggregates recursively. */
3614 switch (TREE_CODE (type))
3615 {
3616 case RECORD_TYPE:
3617 case UNION_TYPE:
3618 case QUAL_UNION_TYPE:
3619 {
3620 tree field;
3621
3622 /* Walk all the structure fields. */
3623 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3624 {
3625 if (TREE_CODE (field) == FIELD_DECL
3626 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3627 return true;
3628 }
3629 break;
3630 }
3631
3632 case ARRAY_TYPE:
3633 /* Just for use if some languages passes arrays by value. */
3634 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3635 return true;
3636 break;
3637
3638 default:
3639 gcc_unreachable ();
3640 }
3641 }
3642 return false;
3643}
3644
3645/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3646 XXX: This function is obsolete and is only used for checking psABI
3647 compatibility with previous versions of GCC. */
3648
3649static unsigned int
3650ix86_compat_function_arg_boundary (machine_mode mode,
3651 const_tree type, unsigned int align)
3652{
3653 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3654 natural boundaries. */
3655 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3656 {
3657 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3658 make an exception for SSE modes since these require 128bit
3659 alignment.
3660
3661 The handling here differs from field_alignment. ICC aligns MMX
3662 arguments to 4 byte boundaries, while structure fields are aligned
3663 to 8 byte boundaries. */
3664 if (!type)
3665 {
3666 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3667 align = PARM_BOUNDARY;
3668 }
3669 else
3670 {
3671 if (!ix86_compat_aligned_value_p (type))
3672 align = PARM_BOUNDARY;
3673 }
3674 }
3675 if (align > BIGGEST_ALIGNMENT)
3676 align = BIGGEST_ALIGNMENT;
3677 return align;
3678}
3679
3680/* Return true when TYPE should be 128bit aligned for 32bit argument
3681 passing ABI. */
3682
3683static bool
3684ix86_contains_aligned_value_p (const_tree type)
3685{
3686 machine_mode mode = TYPE_MODE (type);
3687
3688 if (mode == XFmode || mode == XCmode)
3689 return false;
3690
3691 if (TYPE_ALIGN (type) < 128)
3692 return false;
3693
3694 if (AGGREGATE_TYPE_P (type))
3695 {
3696 /* Walk the aggregates recursively. */
3697 switch (TREE_CODE (type))
3698 {
3699 case RECORD_TYPE:
3700 case UNION_TYPE:
3701 case QUAL_UNION_TYPE:
3702 {
3703 tree field;
3704
3705 /* Walk all the structure fields. */
3706 for (field = TYPE_FIELDS (type);
3707 field;
3708 field = DECL_CHAIN (field))
3709 {
3710 if (TREE_CODE (field) == FIELD_DECL
3711 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3712 return true;
3713 }
3714 break;
3715 }
3716
3717 case ARRAY_TYPE:
3718 /* Just for use if some languages passes arrays by value. */
3719 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3720 return true;
3721 break;
3722
3723 default:
3724 gcc_unreachable ();
3725 }
3726 }
3727 else
3728 return TYPE_ALIGN (type) >= 128;
3729
3730 return false;
3731}
3732
3733/* Gives the alignment boundary, in bits, of an argument with the
3734 specified mode and type. */
3735
3736static unsigned int
3737ix86_function_arg_boundary (machine_mode mode, const_tree type)
3738{
3739 unsigned int align;
3740 if (type)
3741 {
3742 /* Since the main variant type is used for call, we convert it to
3743 the main variant type. */
3744 type = TYPE_MAIN_VARIANT (type);
3745 align = TYPE_ALIGN (type);
3746 if (TYPE_EMPTY_P (type))
3747 return PARM_BOUNDARY;
3748 }
3749 else
3750 align = GET_MODE_ALIGNMENT (mode);
3751 if (align < PARM_BOUNDARY)
3752 align = PARM_BOUNDARY;
3753 else
3754 {
3755 static bool warned;
3756 unsigned int saved_align = align;
3757
3758 if (!TARGET_64BIT)
3759 {
3760 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3761 if (!type)
3762 {
3763 if (mode == XFmode || mode == XCmode)
3764 align = PARM_BOUNDARY;
3765 }
3766 else if (!ix86_contains_aligned_value_p (type))
3767 align = PARM_BOUNDARY;
3768
3769 if (align < 128)
3770 align = PARM_BOUNDARY;
3771 }
3772
3773 if (warn_psabi
3774 && !warned
3775 && align != ix86_compat_function_arg_boundary (mode, type,
3776 align: saved_align))
3777 {
3778 warned = true;
3779 inform (input_location,
3780 "the ABI for passing parameters with %d-byte"
3781 " alignment has changed in GCC 4.6",
3782 align / BITS_PER_UNIT);
3783 }
3784 }
3785
3786 return align;
3787}
3788
3789/* Return true if N is a possible register number of function value. */
3790
3791static bool
3792ix86_function_value_regno_p (const unsigned int regno)
3793{
3794 switch (regno)
3795 {
3796 case AX_REG:
3797 return true;
3798 case DX_REG:
3799 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3800 case DI_REG:
3801 case SI_REG:
3802 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3803
3804 /* Complex values are returned in %st(0)/%st(1) pair. */
3805 case ST0_REG:
3806 case ST1_REG:
3807 /* TODO: The function should depend on current function ABI but
3808 builtins.cc would need updating then. Therefore we use the
3809 default ABI. */
3810 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3811 return false;
3812 return TARGET_FLOAT_RETURNS_IN_80387;
3813
3814 /* Complex values are returned in %xmm0/%xmm1 pair. */
3815 case XMM0_REG:
3816 case XMM1_REG:
3817 return TARGET_SSE;
3818
3819 case MM0_REG:
3820 if (TARGET_MACHO || TARGET_64BIT)
3821 return false;
3822 return TARGET_MMX;
3823 }
3824
3825 return false;
3826}
3827
3828/* Check whether the register REGNO should be zeroed on X86.
3829 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3830 together, no need to zero it again.
3831 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3832
3833static bool
3834zero_call_used_regno_p (const unsigned int regno,
3835 bool all_sse_zeroed,
3836 bool need_zero_mmx)
3837{
3838 return GENERAL_REGNO_P (regno)
3839 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3840 || MASK_REGNO_P (regno)
3841 || (need_zero_mmx && MMX_REGNO_P (regno));
3842}
3843
3844/* Return the machine_mode that is used to zero register REGNO. */
3845
3846static machine_mode
3847zero_call_used_regno_mode (const unsigned int regno)
3848{
3849 /* NB: We only need to zero the lower 32 bits for integer registers
3850 and the lower 128 bits for vector registers since destination are
3851 zero-extended to the full register width. */
3852 if (GENERAL_REGNO_P (regno))
3853 return SImode;
3854 else if (SSE_REGNO_P (regno))
3855 return V4SFmode;
3856 else if (MASK_REGNO_P (regno))
3857 return HImode;
3858 else if (MMX_REGNO_P (regno))
3859 return V2SImode;
3860 else
3861 gcc_unreachable ();
3862}
3863
3864/* Generate a rtx to zero all vector registers together if possible,
3865 otherwise, return NULL. */
3866
3867static rtx
3868zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3869{
3870 if (!TARGET_AVX)
3871 return NULL;
3872
3873 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3874 if ((LEGACY_SSE_REGNO_P (regno)
3875 || (TARGET_64BIT
3876 && (REX_SSE_REGNO_P (regno)
3877 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3878 && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3879 return NULL;
3880
3881 return gen_avx_vzeroall ();
3882}
3883
3884/* Generate insns to zero all st registers together.
3885 Return true when zeroing instructions are generated.
3886 Assume the number of st registers that are zeroed is num_of_st,
3887 we will emit the following sequence to zero them together:
3888 fldz; \
3889 fldz; \
3890 ...
3891 fldz; \
3892 fstp %%st(0); \
3893 fstp %%st(0); \
3894 ...
3895 fstp %%st(0);
3896 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3897 mark stack slots empty.
3898
3899 How to compute the num_of_st:
3900 There is no direct mapping from stack registers to hard register
3901 numbers. If one stack register needs to be cleared, we don't know
3902 where in the stack the value remains. So, if any stack register
3903 needs to be cleared, the whole stack should be cleared. However,
3904 x87 stack registers that hold the return value should be excluded.
3905 x87 returns in the top (two for complex values) register, so
3906 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3907 return the value of num_of_st. */
3908
3909
3910static int
3911zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3912{
3913
3914 /* If the FPU is disabled, no need to zero all st registers. */
3915 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3916 return 0;
3917
3918 unsigned int num_of_st = 0;
3919 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3920 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3921 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3922 {
3923 num_of_st++;
3924 break;
3925 }
3926
3927 if (num_of_st == 0)
3928 return 0;
3929
3930 bool return_with_x87 = false;
3931 return_with_x87 = (crtl->return_rtx
3932 && (STACK_REG_P (crtl->return_rtx)));
3933
3934 bool complex_return = false;
3935 complex_return = (crtl->return_rtx
3936 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3937
3938 if (return_with_x87)
3939 if (complex_return)
3940 num_of_st = 6;
3941 else
3942 num_of_st = 7;
3943 else
3944 num_of_st = 8;
3945
3946 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3947 for (unsigned int i = 0; i < num_of_st; i++)
3948 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3949
3950 for (unsigned int i = 0; i < num_of_st; i++)
3951 {
3952 rtx insn;
3953 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3954 add_reg_note (insn, REG_DEAD, st_reg);
3955 }
3956 return num_of_st;
3957}
3958
3959
3960/* When the routine exit in MMX mode, if any ST register needs
3961 to be zeroed, we should clear all MMX registers except the
3962 RET_MMX_REGNO that holds the return value. */
3963static bool
3964zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3965 unsigned int ret_mmx_regno)
3966{
3967 bool need_zero_all_mm = false;
3968 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3969 if (STACK_REGNO_P (regno)
3970 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3971 {
3972 need_zero_all_mm = true;
3973 break;
3974 }
3975
3976 if (!need_zero_all_mm)
3977 return false;
3978
3979 machine_mode mode = V2SImode;
3980 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3981 if (regno != ret_mmx_regno)
3982 {
3983 rtx reg = gen_rtx_REG (mode, regno);
3984 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3985 }
3986 return true;
3987}
3988
3989/* TARGET_ZERO_CALL_USED_REGS. */
3990/* Generate a sequence of instructions that zero registers specified by
3991 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3992 zeroed. */
3993static HARD_REG_SET
3994ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3995{
3996 HARD_REG_SET zeroed_hardregs;
3997 bool all_sse_zeroed = false;
3998 int all_st_zeroed_num = 0;
3999 bool all_mm_zeroed = false;
4000
4001 CLEAR_HARD_REG_SET (set&: zeroed_hardregs);
4002
4003 /* first, let's see whether we can zero all vector registers together. */
4004 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
4005 if (zero_all_vec_insn)
4006 {
4007 emit_insn (zero_all_vec_insn);
4008 all_sse_zeroed = true;
4009 }
4010
4011 /* mm/st registers are shared registers set, we should follow the following
4012 rules to clear them:
4013 MMX exit mode x87 exit mode
4014 -------------|----------------------|---------------
4015 uses x87 reg | clear all MMX | clear all x87
4016 uses MMX reg | clear individual MMX | clear all x87
4017 x87 + MMX | clear all MMX | clear all x87
4018
4019 first, we should decide which mode (MMX mode or x87 mode) the function
4020 exit with. */
4021
4022 bool exit_with_mmx_mode = (crtl->return_rtx
4023 && (MMX_REG_P (crtl->return_rtx)));
4024
4025 if (!exit_with_mmx_mode)
4026 /* x87 exit mode, we should zero all st registers together. */
4027 {
4028 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
4029
4030 if (all_st_zeroed_num > 0)
4031 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4032 /* x87 stack registers that hold the return value should be excluded.
4033 x87 returns in the top (two for complex values) register. */
4034 if (all_st_zeroed_num == 8
4035 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4036 || (all_st_zeroed_num == 6
4037 && (regno == (REGNO (crtl->return_rtx) + 1)))))
4038 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4039 }
4040 else
4041 /* MMX exit mode, check whether we can zero all mm registers. */
4042 {
4043 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4044 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4045 ret_mmx_regno: exit_mmx_regno);
4046 if (all_mm_zeroed)
4047 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4048 if (regno != exit_mmx_regno)
4049 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4050 }
4051
4052 /* Now, generate instructions to zero all the other registers. */
4053
4054 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4055 {
4056 if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
4057 continue;
4058 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4059 need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed))
4060 continue;
4061
4062 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
4063
4064 machine_mode mode = zero_call_used_regno_mode (regno);
4065
4066 rtx reg = gen_rtx_REG (mode, regno);
4067 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4068
4069 switch (mode)
4070 {
4071 case E_SImode:
4072 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4073 {
4074 rtx clob = gen_rtx_CLOBBER (VOIDmode,
4075 gen_rtx_REG (CCmode,
4076 FLAGS_REG));
4077 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4078 tmp,
4079 clob));
4080 }
4081 /* FALLTHRU. */
4082
4083 case E_V4SFmode:
4084 case E_HImode:
4085 case E_V2SImode:
4086 emit_insn (tmp);
4087 break;
4088
4089 default:
4090 gcc_unreachable ();
4091 }
4092 }
4093 return zeroed_hardregs;
4094}
4095
4096/* Define how to find the value returned by a function.
4097 VALTYPE is the data type of the value (as a tree).
4098 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4099 otherwise, FUNC is 0. */
4100
4101static rtx
4102function_value_32 (machine_mode orig_mode, machine_mode mode,
4103 const_tree fntype, const_tree fn)
4104{
4105 unsigned int regno;
4106
4107 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4108 we normally prevent this case when mmx is not available. However
4109 some ABIs may require the result to be returned like DImode. */
4110 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4111 regno = FIRST_MMX_REG;
4112
4113 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4114 we prevent this case when sse is not available. However some ABIs
4115 may require the result to be returned like integer TImode. */
4116 else if (mode == TImode
4117 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4118 regno = FIRST_SSE_REG;
4119
4120 /* 32-byte vector modes in %ymm0. */
4121 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4122 regno = FIRST_SSE_REG;
4123
4124 /* 64-byte vector modes in %zmm0. */
4125 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4126 regno = FIRST_SSE_REG;
4127
4128 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4129 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4130 regno = FIRST_FLOAT_REG;
4131 else
4132 /* Most things go in %eax. */
4133 regno = AX_REG;
4134
4135 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4136 if (mode == HFmode || mode == BFmode)
4137 {
4138 if (!TARGET_SSE2)
4139 {
4140 error ("SSE register return with SSE2 disabled");
4141 regno = AX_REG;
4142 }
4143 else
4144 regno = FIRST_SSE_REG;
4145 }
4146
4147 if (mode == HCmode)
4148 {
4149 if (!TARGET_SSE2)
4150 error ("SSE register return with SSE2 disabled");
4151
4152 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4153 XVECEXP (ret, 0, 0)
4154 = gen_rtx_EXPR_LIST (VOIDmode,
4155 gen_rtx_REG (SImode,
4156 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4157 GEN_INT (0));
4158 return ret;
4159 }
4160
4161 /* Override FP return register with %xmm0 for local functions when
4162 SSE math is enabled or for functions with sseregparm attribute. */
4163 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4164 {
4165 int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false);
4166 if (sse_level == -1)
4167 {
4168 error ("calling %qD with SSE calling convention without "
4169 "SSE/SSE2 enabled", fn);
4170 sorry ("this is a GCC bug that can be worked around by adding "
4171 "attribute used to function called");
4172 }
4173 else if ((sse_level >= 1 && mode == SFmode)
4174 || (sse_level == 2 && mode == DFmode))
4175 regno = FIRST_SSE_REG;
4176 }
4177
4178 /* OImode shouldn't be used directly. */
4179 gcc_assert (mode != OImode);
4180
4181 return gen_rtx_REG (orig_mode, regno);
4182}
4183
4184static rtx
4185function_value_64 (machine_mode orig_mode, machine_mode mode,
4186 const_tree valtype)
4187{
4188 rtx ret;
4189
4190 /* Handle libcalls, which don't provide a type node. */
4191 if (valtype == NULL)
4192 {
4193 unsigned int regno;
4194
4195 switch (mode)
4196 {
4197 case E_BFmode:
4198 case E_HFmode:
4199 case E_HCmode:
4200 case E_SFmode:
4201 case E_SCmode:
4202 case E_DFmode:
4203 case E_DCmode:
4204 case E_TFmode:
4205 case E_SDmode:
4206 case E_DDmode:
4207 case E_TDmode:
4208 regno = FIRST_SSE_REG;
4209 break;
4210 case E_XFmode:
4211 case E_XCmode:
4212 regno = FIRST_FLOAT_REG;
4213 break;
4214 case E_TCmode:
4215 return NULL;
4216 default:
4217 regno = AX_REG;
4218 }
4219
4220 return gen_rtx_REG (mode, regno);
4221 }
4222 else if (POINTER_TYPE_P (valtype))
4223 {
4224 /* Pointers are always returned in word_mode. */
4225 mode = word_mode;
4226 }
4227
4228 ret = construct_container (mode, orig_mode, type: valtype, in_return: 1,
4229 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4230 intreg: x86_64_int_return_registers, sse_regno: 0);
4231
4232 /* For zero sized structures, construct_container returns NULL, but we
4233 need to keep rest of compiler happy by returning meaningful value. */
4234 if (!ret)
4235 ret = gen_rtx_REG (orig_mode, AX_REG);
4236
4237 return ret;
4238}
4239
4240static rtx
4241function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4242 const_tree fntype, const_tree fn, const_tree valtype)
4243{
4244 unsigned int regno;
4245
4246 /* Floating point return values in %st(0)
4247 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4248 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4249 && (GET_MODE_SIZE (mode) > 8
4250 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4251 {
4252 regno = FIRST_FLOAT_REG;
4253 return gen_rtx_REG (orig_mode, regno);
4254 }
4255 else
4256 return function_value_32(orig_mode, mode, fntype,fn);
4257}
4258
4259static rtx
4260function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4261 const_tree valtype)
4262{
4263 unsigned int regno = AX_REG;
4264
4265 if (TARGET_SSE)
4266 {
4267 switch (GET_MODE_SIZE (mode))
4268 {
4269 case 16:
4270 if (valtype != NULL_TREE
4271 && !VECTOR_INTEGER_TYPE_P (valtype)
4272 && !VECTOR_INTEGER_TYPE_P (valtype)
4273 && !INTEGRAL_TYPE_P (valtype)
4274 && !VECTOR_FLOAT_TYPE_P (valtype))
4275 break;
4276 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4277 && !COMPLEX_MODE_P (mode))
4278 regno = FIRST_SSE_REG;
4279 break;
4280 case 8:
4281 case 4:
4282 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4283 break;
4284 if (mode == SFmode || mode == DFmode)
4285 regno = FIRST_SSE_REG;
4286 break;
4287 default:
4288 break;
4289 }
4290 }
4291 return gen_rtx_REG (orig_mode, regno);
4292}
4293
4294static rtx
4295ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4296 machine_mode orig_mode, machine_mode mode)
4297{
4298 const_tree fn, fntype;
4299
4300 fn = NULL_TREE;
4301 if (fntype_or_decl && DECL_P (fntype_or_decl))
4302 fn = fntype_or_decl;
4303 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4304
4305 if (ix86_function_type_abi (fntype) == MS_ABI)
4306 {
4307 if (TARGET_64BIT)
4308 return function_value_ms_64 (orig_mode, mode, valtype);
4309 else
4310 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4311 }
4312 else if (TARGET_64BIT)
4313 return function_value_64 (orig_mode, mode, valtype);
4314 else
4315 return function_value_32 (orig_mode, mode, fntype, fn);
4316}
4317
4318static rtx
4319ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4320{
4321 machine_mode mode, orig_mode;
4322
4323 orig_mode = TYPE_MODE (valtype);
4324 mode = type_natural_mode (type: valtype, NULL, in_return: true);
4325 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4326}
4327
4328/* Pointer function arguments and return values are promoted to
4329 word_mode for normal functions. */
4330
4331static machine_mode
4332ix86_promote_function_mode (const_tree type, machine_mode mode,
4333 int *punsignedp, const_tree fntype,
4334 int for_return)
4335{
4336 if (cfun->machine->func_type == TYPE_NORMAL
4337 && type != NULL_TREE
4338 && POINTER_TYPE_P (type))
4339 {
4340 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4341 return word_mode;
4342 }
4343 return default_promote_function_mode (type, mode, punsignedp, fntype,
4344 for_return);
4345}
4346
4347/* Return true if a structure, union or array with MODE containing FIELD
4348 should be accessed using BLKmode. */
4349
4350static bool
4351ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4352{
4353 /* Union with XFmode must be in BLKmode. */
4354 return (mode == XFmode
4355 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4356 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4357}
4358
4359rtx
4360ix86_libcall_value (machine_mode mode)
4361{
4362 return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode);
4363}
4364
4365/* Return true iff type is returned in memory. */
4366
4367static bool
4368ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4369{
4370 const machine_mode mode = type_natural_mode (type, NULL, in_return: true);
4371 HOST_WIDE_INT size;
4372
4373 if (TARGET_64BIT)
4374 {
4375 if (ix86_function_type_abi (fntype) == MS_ABI)
4376 {
4377 size = int_size_in_bytes (type);
4378
4379 /* __m128 is returned in xmm0. */
4380 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4381 || INTEGRAL_TYPE_P (type)
4382 || VECTOR_FLOAT_TYPE_P (type))
4383 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4384 && !COMPLEX_MODE_P (mode)
4385 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4386 return false;
4387
4388 /* Otherwise, the size must be exactly in [1248]. */
4389 return size != 1 && size != 2 && size != 4 && size != 8;
4390 }
4391 else
4392 {
4393 int needed_intregs, needed_sseregs;
4394
4395 return examine_argument (mode, type, in_return: 1,
4396 int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4397 }
4398 }
4399 else
4400 {
4401 size = int_size_in_bytes (type);
4402
4403 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4404 bytes in registers. */
4405 if (TARGET_IAMCU)
4406 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4407
4408 if (mode == BLKmode)
4409 return true;
4410
4411 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4412 return false;
4413
4414 if (VECTOR_MODE_P (mode) || mode == TImode)
4415 {
4416 /* User-created vectors small enough to fit in EAX. */
4417 if (size < 8)
4418 return false;
4419
4420 /* Unless ABI prescibes otherwise,
4421 MMX/3dNow values are returned in MM0 if available. */
4422
4423 if (size == 8)
4424 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4425
4426 /* SSE values are returned in XMM0 if available. */
4427 if (size == 16)
4428 return !TARGET_SSE;
4429
4430 /* AVX values are returned in YMM0 if available. */
4431 if (size == 32)
4432 return !TARGET_AVX;
4433
4434 /* AVX512F values are returned in ZMM0 if available. */
4435 if (size == 64)
4436 return !TARGET_AVX512F;
4437 }
4438
4439 if (mode == XFmode)
4440 return false;
4441
4442 if (size > 12)
4443 return true;
4444
4445 /* OImode shouldn't be used directly. */
4446 gcc_assert (mode != OImode);
4447
4448 return false;
4449 }
4450}
4451
4452/* Implement TARGET_PUSH_ARGUMENT. */
4453
4454static bool
4455ix86_push_argument (unsigned int npush)
4456{
4457 /* If SSE2 is available, use vector move to put large argument onto
4458 stack. NB: In 32-bit mode, use 8-byte vector move. */
4459 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4460 && TARGET_PUSH_ARGS
4461 && !ACCUMULATE_OUTGOING_ARGS);
4462}
4463
4464
4465/* Create the va_list data type. */
4466
4467static tree
4468ix86_build_builtin_va_list_64 (void)
4469{
4470 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4471
4472 record = lang_hooks.types.make_type (RECORD_TYPE);
4473 type_decl = build_decl (BUILTINS_LOCATION,
4474 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4475
4476 f_gpr = build_decl (BUILTINS_LOCATION,
4477 FIELD_DECL, get_identifier ("gp_offset"),
4478 unsigned_type_node);
4479 f_fpr = build_decl (BUILTINS_LOCATION,
4480 FIELD_DECL, get_identifier ("fp_offset"),
4481 unsigned_type_node);
4482 f_ovf = build_decl (BUILTINS_LOCATION,
4483 FIELD_DECL, get_identifier ("overflow_arg_area"),
4484 ptr_type_node);
4485 f_sav = build_decl (BUILTINS_LOCATION,
4486 FIELD_DECL, get_identifier ("reg_save_area"),
4487 ptr_type_node);
4488
4489 va_list_gpr_counter_field = f_gpr;
4490 va_list_fpr_counter_field = f_fpr;
4491
4492 DECL_FIELD_CONTEXT (f_gpr) = record;
4493 DECL_FIELD_CONTEXT (f_fpr) = record;
4494 DECL_FIELD_CONTEXT (f_ovf) = record;
4495 DECL_FIELD_CONTEXT (f_sav) = record;
4496
4497 TYPE_STUB_DECL (record) = type_decl;
4498 TYPE_NAME (record) = type_decl;
4499 TYPE_FIELDS (record) = f_gpr;
4500 DECL_CHAIN (f_gpr) = f_fpr;
4501 DECL_CHAIN (f_fpr) = f_ovf;
4502 DECL_CHAIN (f_ovf) = f_sav;
4503
4504 layout_type (record);
4505
4506 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4507 NULL_TREE, TYPE_ATTRIBUTES (record));
4508
4509 /* The correct type is an array type of one element. */
4510 return build_array_type (record, build_index_type (size_zero_node));
4511}
4512
4513/* Setup the builtin va_list data type and for 64-bit the additional
4514 calling convention specific va_list data types. */
4515
4516static tree
4517ix86_build_builtin_va_list (void)
4518{
4519 if (TARGET_64BIT)
4520 {
4521 /* Initialize ABI specific va_list builtin types.
4522
4523 In lto1, we can encounter two va_list types:
4524 - one as a result of the type-merge across TUs, and
4525 - the one constructed here.
4526 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4527 a type identity check in canonical_va_list_type based on
4528 TYPE_MAIN_VARIANT (which we used to have) will not work.
4529 Instead, we tag each va_list_type_node with its unique attribute, and
4530 look for the attribute in the type identity check in
4531 canonical_va_list_type.
4532
4533 Tagging sysv_va_list_type_node directly with the attribute is
4534 problematic since it's a array of one record, which will degrade into a
4535 pointer to record when used as parameter (see build_va_arg comments for
4536 an example), dropping the attribute in the process. So we tag the
4537 record instead. */
4538
4539 /* For SYSV_ABI we use an array of one record. */
4540 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4541
4542 /* For MS_ABI we use plain pointer to argument area. */
4543 tree char_ptr_type = build_pointer_type (char_type_node);
4544 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4545 TYPE_ATTRIBUTES (char_ptr_type));
4546 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4547
4548 return ((ix86_abi == MS_ABI)
4549 ? ms_va_list_type_node
4550 : sysv_va_list_type_node);
4551 }
4552 else
4553 {
4554 /* For i386 we use plain pointer to argument area. */
4555 return build_pointer_type (char_type_node);
4556 }
4557}
4558
4559/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4560
4561static void
4562setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4563{
4564 rtx save_area, mem;
4565 alias_set_type set;
4566 int i, max;
4567
4568 /* GPR size of varargs save area. */
4569 if (cfun->va_list_gpr_size)
4570 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4571 else
4572 ix86_varargs_gpr_size = 0;
4573
4574 /* FPR size of varargs save area. We don't need it if we don't pass
4575 anything in SSE registers. */
4576 if (TARGET_SSE && cfun->va_list_fpr_size)
4577 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4578 else
4579 ix86_varargs_fpr_size = 0;
4580
4581 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4582 return;
4583
4584 save_area = frame_pointer_rtx;
4585 set = get_varargs_alias_set ();
4586
4587 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4588 if (max > X86_64_REGPARM_MAX)
4589 max = X86_64_REGPARM_MAX;
4590
4591 for (i = cum->regno; i < max; i++)
4592 {
4593 mem = gen_rtx_MEM (word_mode,
4594 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4595 MEM_NOTRAP_P (mem) = 1;
4596 set_mem_alias_set (mem, set);
4597 emit_move_insn (mem,
4598 gen_rtx_REG (word_mode,
4599 x86_64_int_parameter_registers[i]));
4600 }
4601
4602 if (ix86_varargs_fpr_size)
4603 {
4604 machine_mode smode;
4605 rtx_code_label *label;
4606 rtx test;
4607
4608 /* Now emit code to save SSE registers. The AX parameter contains number
4609 of SSE parameter registers used to call this function, though all we
4610 actually check here is the zero/non-zero status. */
4611
4612 label = gen_label_rtx ();
4613 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4614 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4615 label));
4616
4617 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4618 we used movdqa (i.e. TImode) instead? Perhaps even better would
4619 be if we could determine the real mode of the data, via a hook
4620 into pass_stdarg. Ignore all that for now. */
4621 smode = V4SFmode;
4622 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4623 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4624
4625 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4626 if (max > X86_64_SSE_REGPARM_MAX)
4627 max = X86_64_SSE_REGPARM_MAX;
4628
4629 for (i = cum->sse_regno; i < max; ++i)
4630 {
4631 mem = plus_constant (Pmode, save_area,
4632 i * 16 + ix86_varargs_gpr_size);
4633 mem = gen_rtx_MEM (smode, mem);
4634 MEM_NOTRAP_P (mem) = 1;
4635 set_mem_alias_set (mem, set);
4636 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4637
4638 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4639 }
4640
4641 emit_label (label);
4642 }
4643}
4644
4645static void
4646setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4647{
4648 alias_set_type set = get_varargs_alias_set ();
4649 int i;
4650
4651 /* Reset to zero, as there might be a sysv vaarg used
4652 before. */
4653 ix86_varargs_gpr_size = 0;
4654 ix86_varargs_fpr_size = 0;
4655
4656 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4657 {
4658 rtx reg, mem;
4659
4660 mem = gen_rtx_MEM (Pmode,
4661 plus_constant (Pmode, virtual_incoming_args_rtx,
4662 i * UNITS_PER_WORD));
4663 MEM_NOTRAP_P (mem) = 1;
4664 set_mem_alias_set (mem, set);
4665
4666 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4667 emit_move_insn (mem, reg);
4668 }
4669}
4670
4671static void
4672ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4673 const function_arg_info &arg,
4674 int *, int no_rtl)
4675{
4676 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
4677 CUMULATIVE_ARGS next_cum;
4678 tree fntype;
4679
4680 /* This argument doesn't appear to be used anymore. Which is good,
4681 because the old code here didn't suppress rtl generation. */
4682 gcc_assert (!no_rtl);
4683
4684 if (!TARGET_64BIT)
4685 return;
4686
4687 fntype = TREE_TYPE (current_function_decl);
4688
4689 /* For varargs, we do not want to skip the dummy va_dcl argument.
4690 For stdargs, we do want to skip the last named argument. */
4691 next_cum = *cum;
4692 if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4693 || arg.type != NULL_TREE)
4694 && stdarg_p (fntype))
4695 ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg);
4696
4697 if (cum->call_abi == MS_ABI)
4698 setup_incoming_varargs_ms_64 (&next_cum);
4699 else
4700 setup_incoming_varargs_64 (&next_cum);
4701}
4702
4703/* Checks if TYPE is of kind va_list char *. */
4704
4705static bool
4706is_va_list_char_pointer (tree type)
4707{
4708 tree canonic;
4709
4710 /* For 32-bit it is always true. */
4711 if (!TARGET_64BIT)
4712 return true;
4713 canonic = ix86_canonical_va_list_type (type);
4714 return (canonic == ms_va_list_type_node
4715 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4716}
4717
4718/* Implement va_start. */
4719
4720static void
4721ix86_va_start (tree valist, rtx nextarg)
4722{
4723 HOST_WIDE_INT words, n_gpr, n_fpr;
4724 tree f_gpr, f_fpr, f_ovf, f_sav;
4725 tree gpr, fpr, ovf, sav, t;
4726 tree type;
4727 rtx ovf_rtx;
4728
4729 if (flag_split_stack
4730 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4731 {
4732 unsigned int scratch_regno;
4733
4734 /* When we are splitting the stack, we can't refer to the stack
4735 arguments using internal_arg_pointer, because they may be on
4736 the old stack. The split stack prologue will arrange to
4737 leave a pointer to the old stack arguments in a scratch
4738 register, which we here copy to a pseudo-register. The split
4739 stack prologue can't set the pseudo-register directly because
4740 it (the prologue) runs before any registers have been saved. */
4741
4742 scratch_regno = split_stack_prologue_scratch_regno ();
4743 if (scratch_regno != INVALID_REGNUM)
4744 {
4745 rtx reg;
4746 rtx_insn *seq;
4747
4748 reg = gen_reg_rtx (Pmode);
4749 cfun->machine->split_stack_varargs_pointer = reg;
4750
4751 start_sequence ();
4752 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4753 seq = end_sequence ();
4754
4755 push_topmost_sequence ();
4756 emit_insn_after (seq, entry_of_function ());
4757 pop_topmost_sequence ();
4758 }
4759 }
4760
4761 /* Only 64bit target needs something special. */
4762 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4763 {
4764 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4765 std_expand_builtin_va_start (valist, nextarg);
4766 else
4767 {
4768 rtx va_r, next;
4769
4770 va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE);
4771 next = expand_binop (ptr_mode, add_optab,
4772 cfun->machine->split_stack_varargs_pointer,
4773 crtl->args.arg_offset_rtx,
4774 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4775 convert_move (va_r, next, 0);
4776 }
4777 return;
4778 }
4779
4780 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4781 f_fpr = DECL_CHAIN (f_gpr);
4782 f_ovf = DECL_CHAIN (f_fpr);
4783 f_sav = DECL_CHAIN (f_ovf);
4784
4785 valist = build_simple_mem_ref (valist);
4786 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4787 /* The following should be folded into the MEM_REF offset. */
4788 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4789 f_gpr, NULL_TREE);
4790 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4791 f_fpr, NULL_TREE);
4792 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4793 f_ovf, NULL_TREE);
4794 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4795 f_sav, NULL_TREE);
4796
4797 /* Count number of gp and fp argument registers used. */
4798 words = crtl->args.info.words;
4799 n_gpr = crtl->args.info.regno;
4800 n_fpr = crtl->args.info.sse_regno;
4801
4802 if (cfun->va_list_gpr_size)
4803 {
4804 type = TREE_TYPE (gpr);
4805 t = build2 (MODIFY_EXPR, type,
4806 gpr, build_int_cst (type, n_gpr * 8));
4807 TREE_SIDE_EFFECTS (t) = 1;
4808 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4809 }
4810
4811 if (TARGET_SSE && cfun->va_list_fpr_size)
4812 {
4813 type = TREE_TYPE (fpr);
4814 t = build2 (MODIFY_EXPR, type, fpr,
4815 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4816 TREE_SIDE_EFFECTS (t) = 1;
4817 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4818 }
4819
4820 /* Find the overflow area. */
4821 type = TREE_TYPE (ovf);
4822 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4823 ovf_rtx = crtl->args.internal_arg_pointer;
4824 else
4825 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4826 t = make_tree (type, ovf_rtx);
4827 if (words != 0)
4828 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4829
4830 t = build2 (MODIFY_EXPR, type, ovf, t);
4831 TREE_SIDE_EFFECTS (t) = 1;
4832 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4833
4834 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4835 {
4836 /* Find the register save area.
4837 Prologue of the function save it right above stack frame. */
4838 type = TREE_TYPE (sav);
4839 t = make_tree (type, frame_pointer_rtx);
4840 if (!ix86_varargs_gpr_size)
4841 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4842
4843 t = build2 (MODIFY_EXPR, type, sav, t);
4844 TREE_SIDE_EFFECTS (t) = 1;
4845 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4846 }
4847}
4848
4849/* Implement va_arg. */
4850
4851static tree
4852ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4853 gimple_seq *post_p)
4854{
4855 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4856 tree f_gpr, f_fpr, f_ovf, f_sav;
4857 tree gpr, fpr, ovf, sav, t;
4858 int size, rsize;
4859 tree lab_false, lab_over = NULL_TREE;
4860 tree addr, t2;
4861 rtx container;
4862 int indirect_p = 0;
4863 tree ptrtype;
4864 machine_mode nat_mode;
4865 unsigned int arg_boundary;
4866 unsigned int type_align;
4867
4868 /* Only 64bit target needs something special. */
4869 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4870 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4871
4872 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4873 f_fpr = DECL_CHAIN (f_gpr);
4874 f_ovf = DECL_CHAIN (f_fpr);
4875 f_sav = DECL_CHAIN (f_ovf);
4876
4877 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4878 valist, f_gpr, NULL_TREE);
4879
4880 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4881 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4882 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4883
4884 indirect_p = pass_va_arg_by_reference (type);
4885 if (indirect_p)
4886 type = build_pointer_type (type);
4887 size = arg_int_size_in_bytes (type);
4888 rsize = CEIL (size, UNITS_PER_WORD);
4889
4890 nat_mode = type_natural_mode (type, NULL, in_return: false);
4891 switch (nat_mode)
4892 {
4893 case E_V16HFmode:
4894 case E_V16BFmode:
4895 case E_V8SFmode:
4896 case E_V8SImode:
4897 case E_V32QImode:
4898 case E_V16HImode:
4899 case E_V4DFmode:
4900 case E_V4DImode:
4901 case E_V32HFmode:
4902 case E_V32BFmode:
4903 case E_V16SFmode:
4904 case E_V16SImode:
4905 case E_V64QImode:
4906 case E_V32HImode:
4907 case E_V8DFmode:
4908 case E_V8DImode:
4909 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4910 if (!TARGET_64BIT_MS_ABI)
4911 {
4912 container = NULL;
4913 break;
4914 }
4915 /* FALLTHRU */
4916
4917 default:
4918 container = construct_container (mode: nat_mode, TYPE_MODE (type),
4919 type, in_return: 0, X86_64_REGPARM_MAX,
4920 X86_64_SSE_REGPARM_MAX, intreg,
4921 sse_regno: 0);
4922 break;
4923 }
4924
4925 /* Pull the value out of the saved registers. */
4926
4927 addr = create_tmp_var (ptr_type_node, "addr");
4928 type_align = TYPE_ALIGN (type);
4929
4930 if (container)
4931 {
4932 int needed_intregs, needed_sseregs;
4933 bool need_temp;
4934 tree int_addr, sse_addr;
4935
4936 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4937 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4938
4939 examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4940
4941 bool container_in_reg = false;
4942 if (REG_P (container))
4943 container_in_reg = true;
4944 else if (GET_CODE (container) == PARALLEL
4945 && GET_MODE (container) == BLKmode
4946 && XVECLEN (container, 0) == 1)
4947 {
4948 /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
4949 expression in a TImode register. In this case, temp isn't
4950 needed. Otherwise, the TImode variable will be put in the
4951 GPR save area which guarantees only 8-byte alignment. */
4952 rtx x = XVECEXP (container, 0, 0);
4953 if (GET_CODE (x) == EXPR_LIST
4954 && REG_P (XEXP (x, 0))
4955 && XEXP (x, 1) == const0_rtx)
4956 container_in_reg = true;
4957 }
4958
4959 need_temp = (!container_in_reg
4960 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4961 || TYPE_ALIGN (type) > 128));
4962
4963 /* In case we are passing structure, verify that it is consecutive block
4964 on the register save area. If not we need to do moves. */
4965 if (!need_temp && !container_in_reg)
4966 {
4967 /* Verify that all registers are strictly consecutive */
4968 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4969 {
4970 int i;
4971
4972 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4973 {
4974 rtx slot = XVECEXP (container, 0, i);
4975 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4976 || INTVAL (XEXP (slot, 1)) != i * 16)
4977 need_temp = true;
4978 }
4979 }
4980 else
4981 {
4982 int i;
4983
4984 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4985 {
4986 rtx slot = XVECEXP (container, 0, i);
4987 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4988 || INTVAL (XEXP (slot, 1)) != i * 8)
4989 need_temp = true;
4990 }
4991 }
4992 }
4993 if (!need_temp)
4994 {
4995 int_addr = addr;
4996 sse_addr = addr;
4997 }
4998 else
4999 {
5000 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5001 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5002 }
5003
5004 /* First ensure that we fit completely in registers. */
5005 if (needed_intregs)
5006 {
5007 t = build_int_cst (TREE_TYPE (gpr),
5008 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5009 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5010 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5011 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5012 gimplify_and_add (t, pre_p);
5013 }
5014 if (needed_sseregs)
5015 {
5016 t = build_int_cst (TREE_TYPE (fpr),
5017 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5018 + X86_64_REGPARM_MAX * 8);
5019 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5020 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5021 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5022 gimplify_and_add (t, pre_p);
5023 }
5024
5025 /* Compute index to start of area used for integer regs. */
5026 if (needed_intregs)
5027 {
5028 /* int_addr = gpr + sav; */
5029 t = fold_build_pointer_plus (sav, gpr);
5030 gimplify_assign (int_addr, t, pre_p);
5031 }
5032 if (needed_sseregs)
5033 {
5034 /* sse_addr = fpr + sav; */
5035 t = fold_build_pointer_plus (sav, fpr);
5036 gimplify_assign (sse_addr, t, pre_p);
5037 }
5038 if (need_temp)
5039 {
5040 int i, prev_size = 0;
5041 tree temp = create_tmp_var (type, "va_arg_tmp");
5042 TREE_ADDRESSABLE (temp) = 1;
5043
5044 /* addr = &temp; */
5045 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5046 gimplify_assign (addr, t, pre_p);
5047
5048 for (i = 0; i < XVECLEN (container, 0); i++)
5049 {
5050 rtx slot = XVECEXP (container, 0, i);
5051 rtx reg = XEXP (slot, 0);
5052 machine_mode mode = GET_MODE (reg);
5053 tree piece_type;
5054 tree addr_type;
5055 tree daddr_type;
5056 tree src_addr, src;
5057 int src_offset;
5058 tree dest_addr, dest;
5059 int cur_size = GET_MODE_SIZE (mode);
5060
5061 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5062 prev_size = INTVAL (XEXP (slot, 1));
5063 if (prev_size + cur_size > size)
5064 {
5065 cur_size = size - prev_size;
5066 unsigned int nbits = cur_size * BITS_PER_UNIT;
5067 if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode))
5068 mode = QImode;
5069 }
5070 piece_type = lang_hooks.types.type_for_mode (mode, 1);
5071 if (mode == GET_MODE (reg))
5072 addr_type = build_pointer_type (piece_type);
5073 else
5074 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5075 true);
5076 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5077 true);
5078
5079 if (SSE_REGNO_P (REGNO (reg)))
5080 {
5081 src_addr = sse_addr;
5082 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5083 }
5084 else
5085 {
5086 src_addr = int_addr;
5087 src_offset = REGNO (reg) * 8;
5088 }
5089 src_addr = fold_convert (addr_type, src_addr);
5090 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5091
5092 dest_addr = fold_convert (daddr_type, addr);
5093 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5094 if (cur_size == GET_MODE_SIZE (mode))
5095 {
5096 src = build_va_arg_indirect_ref (src_addr);
5097 dest = build_va_arg_indirect_ref (dest_addr);
5098
5099 gimplify_assign (dest, src, pre_p);
5100 }
5101 else
5102 {
5103 tree copy
5104 = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY),
5105 3, dest_addr, src_addr,
5106 size_int (cur_size));
5107 gimplify_and_add (copy, pre_p);
5108 }
5109 prev_size += cur_size;
5110 }
5111 }
5112
5113 if (needed_intregs)
5114 {
5115 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5116 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5117 gimplify_assign (gpr, t, pre_p);
5118 /* The GPR save area guarantees only 8-byte alignment. */
5119 if (!need_temp)
5120 type_align = MIN (type_align, 64);
5121 }
5122
5123 if (needed_sseregs)
5124 {
5125 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5126 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5127 gimplify_assign (unshare_expr (fpr), t, pre_p);
5128 }
5129
5130 gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over));
5131
5132 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false));
5133 }
5134
5135 /* ... otherwise out of the overflow area. */
5136
5137 /* When we align parameter on stack for caller, if the parameter
5138 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5139 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5140 here with caller. */
5141 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5142 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5143 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5144
5145 /* Care for on-stack alignment if needed. */
5146 if (arg_boundary <= 64 || size == 0)
5147 t = ovf;
5148 else
5149 {
5150 HOST_WIDE_INT align = arg_boundary / 8;
5151 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5152 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5153 build_int_cst (TREE_TYPE (t), -align));
5154 }
5155
5156 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5157 gimplify_assign (addr, t, pre_p);
5158
5159 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5160 gimplify_assign (unshare_expr (ovf), t, pre_p);
5161
5162 if (container)
5163 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over));
5164
5165 type = build_aligned_type (type, type_align);
5166 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5167 addr = fold_convert (ptrtype, addr);
5168
5169 if (indirect_p)
5170 addr = build_va_arg_indirect_ref (addr);
5171 return build_va_arg_indirect_ref (addr);
5172}
5173
5174/* Return true if OPNUM's MEM should be matched
5175 in movabs* patterns. */
5176
5177bool
5178ix86_check_movabs (rtx insn, int opnum)
5179{
5180 rtx set, mem;
5181
5182 set = PATTERN (insn);
5183 if (GET_CODE (set) == PARALLEL)
5184 set = XVECEXP (set, 0, 0);
5185 gcc_assert (GET_CODE (set) == SET);
5186 mem = XEXP (set, opnum);
5187 while (SUBREG_P (mem))
5188 mem = SUBREG_REG (mem);
5189 gcc_assert (MEM_P (mem));
5190 return volatile_ok || !MEM_VOLATILE_P (mem);
5191}
5192
5193/* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
5194bool
5195ix86_check_movs (rtx insn, int idx)
5196{
5197 rtx pat = PATTERN (insn);
5198 gcc_assert (GET_CODE (pat) == PARALLEL);
5199
5200 rtx set = XVECEXP (pat, 0, idx);
5201 gcc_assert (GET_CODE (set) == SET);
5202
5203 rtx dst = SET_DEST (set);
5204 gcc_assert (MEM_P (dst));
5205
5206 rtx src = SET_SRC (set);
5207 gcc_assert (MEM_P (src));
5208
5209 return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
5210 && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
5211 || Pmode == word_mode));
5212}
5213
5214/* Return false if INSN contains a MEM with a non-default address space. */
5215bool
5216ix86_check_no_addr_space (rtx insn)
5217{
5218 subrtx_var_iterator::array_type array;
5219 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5220 {
5221 rtx x = *iter;
5222 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5223 return false;
5224 }
5225 return true;
5226}
5227
5228/* Initialize the table of extra 80387 mathematical constants. */
5229
5230static void
5231init_ext_80387_constants (void)
5232{
5233 static const char * cst[5] =
5234 {
5235 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5236 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5237 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5238 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5239 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5240 };
5241 int i;
5242
5243 for (i = 0; i < 5; i++)
5244 {
5245 real_from_string (&ext_80387_constants_table[i], cst[i]);
5246 /* Ensure each constant is rounded to XFmode precision. */
5247 real_convert (&ext_80387_constants_table[i],
5248 XFmode, &ext_80387_constants_table[i]);
5249 }
5250
5251 ext_80387_constants_init = 1;
5252}
5253
5254/* Return non-zero if the constant is something that
5255 can be loaded with a special instruction. */
5256
5257int
5258standard_80387_constant_p (rtx x)
5259{
5260 machine_mode mode = GET_MODE (x);
5261
5262 const REAL_VALUE_TYPE *r;
5263
5264 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5265 return -1;
5266
5267 if (x == CONST0_RTX (mode))
5268 return 1;
5269 if (x == CONST1_RTX (mode))
5270 return 2;
5271
5272 r = CONST_DOUBLE_REAL_VALUE (x);
5273
5274 /* For XFmode constants, try to find a special 80387 instruction when
5275 optimizing for size or on those CPUs that benefit from them. */
5276 if (mode == XFmode
5277 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5278 && !flag_rounding_math)
5279 {
5280 int i;
5281
5282 if (! ext_80387_constants_init)
5283 init_ext_80387_constants ();
5284
5285 for (i = 0; i < 5; i++)
5286 if (real_identical (r, &ext_80387_constants_table[i]))
5287 return i + 3;
5288 }
5289
5290 /* Load of the constant -0.0 or -1.0 will be split as
5291 fldz;fchs or fld1;fchs sequence. */
5292 if (real_isnegzero (r))
5293 return 8;
5294 if (real_identical (r, &dconstm1))
5295 return 9;
5296
5297 return 0;
5298}
5299
5300/* Return the opcode of the special instruction to be used to load
5301 the constant X. */
5302
5303const char *
5304standard_80387_constant_opcode (rtx x)
5305{
5306 switch (standard_80387_constant_p (x))
5307 {
5308 case 1:
5309 return "fldz";
5310 case 2:
5311 return "fld1";
5312 case 3:
5313 return "fldlg2";
5314 case 4:
5315 return "fldln2";
5316 case 5:
5317 return "fldl2e";
5318 case 6:
5319 return "fldl2t";
5320 case 7:
5321 return "fldpi";
5322 case 8:
5323 case 9:
5324 return "#";
5325 default:
5326 gcc_unreachable ();
5327 }
5328}
5329
5330/* Return the CONST_DOUBLE representing the 80387 constant that is
5331 loaded by the specified special instruction. The argument IDX
5332 matches the return value from standard_80387_constant_p. */
5333
5334rtx
5335standard_80387_constant_rtx (int idx)
5336{
5337 int i;
5338
5339 if (! ext_80387_constants_init)
5340 init_ext_80387_constants ();
5341
5342 switch (idx)
5343 {
5344 case 3:
5345 case 4:
5346 case 5:
5347 case 6:
5348 case 7:
5349 i = idx - 3;
5350 break;
5351
5352 default:
5353 gcc_unreachable ();
5354 }
5355
5356 return const_double_from_real_value (ext_80387_constants_table[i],
5357 XFmode);
5358}
5359
5360/* Return 1 if X is all bits 0, 2 if X is all bits 1
5361 and 3 if X is all bits 1 with zero extend
5362 in supported SSE/AVX vector mode. */
5363
5364int
5365standard_sse_constant_p (rtx x, machine_mode pred_mode)
5366{
5367 machine_mode mode;
5368
5369 if (!TARGET_SSE)
5370 return 0;
5371
5372 mode = GET_MODE (x);
5373
5374 if (x == const0_rtx || const0_operand (x, mode))
5375 return 1;
5376
5377 if (x == constm1_rtx
5378 || vector_all_ones_operand (x, mode)
5379 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5380 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5381 && float_vector_all_ones_operand (x, mode)))
5382 {
5383 /* VOIDmode integer constant, get mode from the predicate. */
5384 if (mode == VOIDmode)
5385 mode = pred_mode;
5386
5387 switch (GET_MODE_SIZE (mode))
5388 {
5389 case 64:
5390 if (TARGET_AVX512F)
5391 return 2;
5392 break;
5393 case 32:
5394 if (TARGET_AVX2)
5395 return 2;
5396 break;
5397 case 16:
5398 if (TARGET_SSE2)
5399 return 2;
5400 break;
5401 case 0:
5402 /* VOIDmode */
5403 gcc_unreachable ();
5404 default:
5405 break;
5406 }
5407 }
5408
5409 if (vector_all_ones_zero_extend_half_operand (x, mode)
5410 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5411 return 3;
5412
5413 return 0;
5414}
5415
5416/* Return the opcode of the special instruction to be used to load
5417 the constant operands[1] into operands[0]. */
5418
5419const char *
5420standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5421{
5422 machine_mode mode;
5423 rtx x = operands[1];
5424
5425 gcc_assert (TARGET_SSE);
5426
5427 mode = GET_MODE (x);
5428
5429 if (x == const0_rtx || const0_operand (x, mode))
5430 {
5431 switch (get_attr_mode (insn))
5432 {
5433 case MODE_TI:
5434 if (!EXT_REX_SSE_REG_P (operands[0]))
5435 return "%vpxor\t%0, %d0";
5436 /* FALLTHRU */
5437 case MODE_XI:
5438 case MODE_OI:
5439 if (EXT_REX_SSE_REG_P (operands[0]))
5440 {
5441 if (TARGET_AVX512VL)
5442 return "vpxord\t%x0, %x0, %x0";
5443 else
5444 return "vpxord\t%g0, %g0, %g0";
5445 }
5446 return "vpxor\t%x0, %x0, %x0";
5447
5448 case MODE_V2DF:
5449 if (!EXT_REX_SSE_REG_P (operands[0]))
5450 return "%vxorpd\t%0, %d0";
5451 /* FALLTHRU */
5452 case MODE_V8DF:
5453 case MODE_V4DF:
5454 if (EXT_REX_SSE_REG_P (operands[0]))
5455 {
5456 if (TARGET_AVX512DQ)
5457 {
5458 if (TARGET_AVX512VL)
5459 return "vxorpd\t%x0, %x0, %x0";
5460 else
5461 return "vxorpd\t%g0, %g0, %g0";
5462 }
5463 else
5464 {
5465 if (TARGET_AVX512VL)
5466 return "vpxorq\t%x0, %x0, %x0";
5467 else
5468 return "vpxorq\t%g0, %g0, %g0";
5469 }
5470 }
5471 return "vxorpd\t%x0, %x0, %x0";
5472
5473 case MODE_V4SF:
5474 if (!EXT_REX_SSE_REG_P (operands[0]))
5475 return "%vxorps\t%0, %d0";
5476 /* FALLTHRU */
5477 case MODE_V16SF:
5478 case MODE_V8SF:
5479 if (EXT_REX_SSE_REG_P (operands[0]))
5480 {
5481 if (TARGET_AVX512DQ)
5482 {
5483 if (TARGET_AVX512VL)
5484 return "vxorps\t%x0, %x0, %x0";
5485 else
5486 return "vxorps\t%g0, %g0, %g0";
5487 }
5488 else
5489 {
5490 if (TARGET_AVX512VL)
5491 return "vpxord\t%x0, %x0, %x0";
5492 else
5493 return "vpxord\t%g0, %g0, %g0";
5494 }
5495 }
5496 return "vxorps\t%x0, %x0, %x0";
5497
5498 default:
5499 gcc_unreachable ();
5500 }
5501 }
5502 else if (x == constm1_rtx
5503 || vector_all_ones_operand (x, mode)
5504 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5505 && float_vector_all_ones_operand (x, mode)))
5506 {
5507 enum attr_mode insn_mode = get_attr_mode (insn);
5508
5509 switch (insn_mode)
5510 {
5511 case MODE_XI:
5512 case MODE_V8DF:
5513 case MODE_V16SF:
5514 gcc_assert (TARGET_AVX512F);
5515 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5516
5517 case MODE_OI:
5518 case MODE_V4DF:
5519 case MODE_V8SF:
5520 gcc_assert (TARGET_AVX2);
5521 /* FALLTHRU */
5522 case MODE_TI:
5523 case MODE_V2DF:
5524 case MODE_V4SF:
5525 gcc_assert (TARGET_SSE2);
5526 if (EXT_REX_SSE_REG_P (operands[0]))
5527 {
5528 if (TARGET_AVX512VL)
5529 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5530 else
5531 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5532 }
5533 return (TARGET_AVX
5534 ? "vpcmpeqd\t%0, %0, %0"
5535 : "pcmpeqd\t%0, %0");
5536
5537 default:
5538 gcc_unreachable ();
5539 }
5540 }
5541 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5542 {
5543 if (GET_MODE_SIZE (mode) == 64)
5544 {
5545 gcc_assert (TARGET_AVX512F);
5546 return "vpcmpeqd\t%t0, %t0, %t0";
5547 }
5548 else if (GET_MODE_SIZE (mode) == 32)
5549 {
5550 gcc_assert (TARGET_AVX);
5551 return "vpcmpeqd\t%x0, %x0, %x0";
5552 }
5553 gcc_unreachable ();
5554 }
5555 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5556 {
5557 gcc_assert (TARGET_AVX512F);
5558 return "vpcmpeqd\t%x0, %x0, %x0";
5559 }
5560
5561 gcc_unreachable ();
5562}
5563
5564/* Returns true if INSN can be transformed from a memory load
5565 to a supported FP constant load. */
5566
5567bool
5568ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5569{
5570 rtx src = find_constant_src (insn);
5571
5572 gcc_assert (REG_P (dst));
5573
5574 if (src == NULL
5575 || (SSE_REGNO_P (REGNO (dst))
5576 && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1)
5577 || (!TARGET_AVX512VL
5578 && EXT_REX_SSE_REGNO_P (REGNO (dst))
5579 && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1)
5580 || (STACK_REGNO_P (REGNO (dst))
5581 && standard_80387_constant_p (x: src) < 1))
5582 return false;
5583
5584 return true;
5585}
5586
5587/* Predicate for pre-reload splitters with associated instructions,
5588 which can match any time before the split1 pass (usually combine),
5589 then are unconditionally split in that pass and should not be
5590 matched again afterwards. */
5591
5592bool
5593ix86_pre_reload_split (void)
5594{
5595 return (can_create_pseudo_p ()
5596 && !(cfun->curr_properties & PROP_rtl_split_insns));
5597}
5598
5599/* Return the opcode of the TYPE_SSEMOV instruction. To move from
5600 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5601 TARGET_AVX512VL or it is a register to register move which can
5602 be done with zmm register move. */
5603
5604static const char *
5605ix86_get_ssemov (rtx *operands, unsigned size,
5606 enum attr_mode insn_mode, machine_mode mode)
5607{
5608 char buf[128];
5609 bool misaligned_p = (misaligned_operand (operands[0], mode)
5610 || misaligned_operand (operands[1], mode));
5611 bool evex_reg_p = (size == 64
5612 || EXT_REX_SSE_REG_P (operands[0])
5613 || EXT_REX_SSE_REG_P (operands[1]));
5614
5615 bool egpr_p = (TARGET_APX_EGPR
5616 && (x86_extended_rex2reg_mentioned_p (operands[0])
5617 || x86_extended_rex2reg_mentioned_p (operands[1])));
5618 bool egpr_vl = egpr_p && TARGET_AVX512VL;
5619
5620 machine_mode scalar_mode;
5621
5622 const char *opcode = NULL;
5623 enum
5624 {
5625 opcode_int,
5626 opcode_float,
5627 opcode_double
5628 } type = opcode_int;
5629
5630 switch (insn_mode)
5631 {
5632 case MODE_V16SF:
5633 case MODE_V8SF:
5634 case MODE_V4SF:
5635 scalar_mode = E_SFmode;
5636 type = opcode_float;
5637 break;
5638 case MODE_V8DF:
5639 case MODE_V4DF:
5640 case MODE_V2DF:
5641 scalar_mode = E_DFmode;
5642 type = opcode_double;
5643 break;
5644 case MODE_XI:
5645 case MODE_OI:
5646 case MODE_TI:
5647 scalar_mode = GET_MODE_INNER (mode);
5648 break;
5649 default:
5650 gcc_unreachable ();
5651 }
5652
5653 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5654 we can only use zmm register move without memory operand. */
5655 if (evex_reg_p
5656 && !TARGET_AVX512VL
5657 && GET_MODE_SIZE (mode) < 64)
5658 {
5659 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5660 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5661 AVX512VL is disabled, LRA can still generate reg to
5662 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5663 modes. */
5664 if (memory_operand (operands[0], mode)
5665 || memory_operand (operands[1], mode))
5666 gcc_unreachable ();
5667 size = 64;
5668 switch (type)
5669 {
5670 case opcode_int:
5671 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5672 opcode = (misaligned_p
5673 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5674 : "vmovdqa64");
5675 else
5676 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5677 break;
5678 case opcode_float:
5679 opcode = misaligned_p ? "vmovups" : "vmovaps";
5680 break;
5681 case opcode_double:
5682 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5683 break;
5684 }
5685 }
5686 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5687 {
5688 switch (scalar_mode)
5689 {
5690 case E_HFmode:
5691 case E_BFmode:
5692 if (evex_reg_p || egpr_vl)
5693 opcode = (misaligned_p
5694 ? (TARGET_AVX512BW
5695 ? "vmovdqu16"
5696 : "vmovdqu64")
5697 : "vmovdqa64");
5698 else if (egpr_p)
5699 opcode = (misaligned_p
5700 ? (TARGET_AVX512BW
5701 ? "vmovdqu16"
5702 : "%vmovups")
5703 : "%vmovaps");
5704 else
5705 opcode = (misaligned_p
5706 ? (TARGET_AVX512BW
5707 ? "vmovdqu16"
5708 : "%vmovdqu")
5709 : "%vmovdqa");
5710 break;
5711 case E_SFmode:
5712 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5713 break;
5714 case E_DFmode:
5715 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5716 break;
5717 case E_TFmode:
5718 if (evex_reg_p || egpr_vl)
5719 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5720 else if (egpr_p)
5721 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5722 else
5723 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5724 break;
5725 default:
5726 gcc_unreachable ();
5727 }
5728 }
5729 else if (SCALAR_INT_MODE_P (scalar_mode))
5730 {
5731 switch (scalar_mode)
5732 {
5733 case E_QImode:
5734 if (evex_reg_p || egpr_vl)
5735 opcode = (misaligned_p
5736 ? (TARGET_AVX512BW
5737 ? "vmovdqu8"
5738 : "vmovdqu64")
5739 : "vmovdqa64");
5740 else if (egpr_p)
5741 opcode = (misaligned_p
5742 ? (TARGET_AVX512BW
5743 ? "vmovdqu8"
5744 : "%vmovups")
5745 : "%vmovaps");
5746 else
5747 opcode = (misaligned_p
5748 ? (TARGET_AVX512BW
5749 ? "vmovdqu8"
5750 : "%vmovdqu")
5751 : "%vmovdqa");
5752 break;
5753 case E_HImode:
5754 if (evex_reg_p || egpr_vl)
5755 opcode = (misaligned_p
5756 ? (TARGET_AVX512BW
5757 ? "vmovdqu16"
5758 : "vmovdqu64")
5759 : "vmovdqa64");
5760 else if (egpr_p)
5761 opcode = (misaligned_p
5762 ? (TARGET_AVX512BW
5763 ? "vmovdqu16"
5764 : "%vmovups")
5765 : "%vmovaps");
5766 else
5767 opcode = (misaligned_p
5768 ? (TARGET_AVX512BW
5769 ? "vmovdqu16"
5770 : "%vmovdqu")
5771 : "%vmovdqa");
5772 break;
5773 case E_SImode:
5774 if (evex_reg_p || egpr_vl)
5775 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5776 else if (egpr_p)
5777 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5778 else
5779 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5780 break;
5781 case E_DImode:
5782 case E_TImode:
5783 case E_OImode:
5784 if (evex_reg_p || egpr_vl)
5785 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5786 else if (egpr_p)
5787 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5788 else
5789 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5790 break;
5791 case E_XImode:
5792 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5793 break;
5794 default:
5795 gcc_unreachable ();
5796 }
5797 }
5798 else
5799 gcc_unreachable ();
5800
5801 switch (size)
5802 {
5803 case 64:
5804 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}",
5805 opcode);
5806 break;
5807 case 32:
5808 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}",
5809 opcode);
5810 break;
5811 case 16:
5812 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}",
5813 opcode);
5814 break;
5815 default:
5816 gcc_unreachable ();
5817 }
5818 output_asm_insn (buf, operands);
5819 return "";
5820}
5821
5822/* Return the template of the TYPE_SSEMOV instruction to move
5823 operands[1] into operands[0]. */
5824
5825const char *
5826ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5827{
5828 machine_mode mode = GET_MODE (operands[0]);
5829 if (get_attr_type (insn) != TYPE_SSEMOV
5830 || mode != GET_MODE (operands[1]))
5831 gcc_unreachable ();
5832
5833 enum attr_mode insn_mode = get_attr_mode (insn);
5834
5835 switch (insn_mode)
5836 {
5837 case MODE_XI:
5838 case MODE_V8DF:
5839 case MODE_V16SF:
5840 return ix86_get_ssemov (operands, size: 64, insn_mode, mode);
5841
5842 case MODE_OI:
5843 case MODE_V4DF:
5844 case MODE_V8SF:
5845 return ix86_get_ssemov (operands, size: 32, insn_mode, mode);
5846
5847 case MODE_TI:
5848 case MODE_V2DF:
5849 case MODE_V4SF:
5850 return ix86_get_ssemov (operands, size: 16, insn_mode, mode);
5851
5852 case MODE_DI:
5853 /* Handle broken assemblers that require movd instead of movq. */
5854 if (GENERAL_REG_P (operands[0]))
5855 {
5856 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5857 return "%vmovq\t{%1, %q0|%q0, %1}";
5858 else
5859 return "%vmovd\t{%1, %q0|%q0, %1}";
5860 }
5861 else if (GENERAL_REG_P (operands[1]))
5862 {
5863 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5864 return "%vmovq\t{%q1, %0|%0, %q1}";
5865 else
5866 return "%vmovd\t{%q1, %0|%0, %q1}";
5867 }
5868 else
5869 return "%vmovq\t{%1, %0|%0, %1}";
5870
5871 case MODE_SI:
5872 if (GENERAL_REG_P (operands[0]))
5873 return "%vmovd\t{%1, %k0|%k0, %1}";
5874 else if (GENERAL_REG_P (operands[1]))
5875 return "%vmovd\t{%k1, %0|%0, %k1}";
5876 else
5877 return "%vmovd\t{%1, %0|%0, %1}";
5878
5879 case MODE_HI:
5880 if (GENERAL_REG_P (operands[0]))
5881 return "vmovw\t{%1, %k0|%k0, %1}";
5882 else if (GENERAL_REG_P (operands[1]))
5883 return "vmovw\t{%k1, %0|%0, %k1}";
5884 else
5885 return "vmovw\t{%1, %0|%0, %1}";
5886
5887 case MODE_DF:
5888 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5889 return "vmovsd\t{%d1, %0|%0, %d1}";
5890 else
5891 return "%vmovsd\t{%1, %0|%0, %1}";
5892
5893 case MODE_SF:
5894 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5895 return "vmovss\t{%d1, %0|%0, %d1}";
5896 else
5897 return "%vmovss\t{%1, %0|%0, %1}";
5898
5899 case MODE_HF:
5900 case MODE_BF:
5901 if (REG_P (operands[0]) && REG_P (operands[1]))
5902 return "vmovsh\t{%d1, %0|%0, %d1}";
5903 else
5904 return "vmovsh\t{%1, %0|%0, %1}";
5905
5906 case MODE_V1DF:
5907 gcc_assert (!TARGET_AVX);
5908 return "movlpd\t{%1, %0|%0, %1}";
5909
5910 case MODE_V2SF:
5911 if (TARGET_AVX && REG_P (operands[0]))
5912 return "vmovlps\t{%1, %d0|%d0, %1}";
5913 else
5914 return "%vmovlps\t{%1, %0|%0, %1}";
5915
5916 default:
5917 gcc_unreachable ();
5918 }
5919}
5920
5921/* Returns true if OP contains a symbol reference */
5922
5923bool
5924symbolic_reference_mentioned_p (rtx op)
5925{
5926 const char *fmt;
5927 int i;
5928
5929 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5930 return true;
5931
5932 fmt = GET_RTX_FORMAT (GET_CODE (op));
5933 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5934 {
5935 if (fmt[i] == 'E')
5936 {
5937 int j;
5938
5939 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5940 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5941 return true;
5942 }
5943
5944 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5945 return true;
5946 }
5947
5948 return false;
5949}
5950
5951/* Return true if it is appropriate to emit `ret' instructions in the
5952 body of a function. Do this only if the epilogue is simple, needing a
5953 couple of insns. Prior to reloading, we can't tell how many registers
5954 must be saved, so return false then. Return false if there is no frame
5955 marker to de-allocate. */
5956
5957bool
5958ix86_can_use_return_insn_p (void)
5959{
5960 if (ix86_function_ms_hook_prologue (fn: current_function_decl))
5961 return false;
5962
5963 if (ix86_function_naked (fn: current_function_decl))
5964 return false;
5965
5966 /* Don't use `ret' instruction in interrupt handler. */
5967 if (! reload_completed
5968 || frame_pointer_needed
5969 || cfun->machine->func_type != TYPE_NORMAL)
5970 return 0;
5971
5972 /* Don't allow more than 32k pop, since that's all we can do
5973 with one instruction. */
5974 if (crtl->args.pops_args && crtl->args.size >= 32768)
5975 return 0;
5976
5977 struct ix86_frame &frame = cfun->machine->frame;
5978 return (frame.stack_pointer_offset == UNITS_PER_WORD
5979 && (frame.nregs + frame.nsseregs) == 0);
5980}
5981
5982/* Return stack frame size. get_frame_size () returns used stack slots
5983 during compilation, which may be optimized out later. If stack frame
5984 is needed, stack_frame_required should be true. */
5985
5986static HOST_WIDE_INT
5987ix86_get_frame_size (void)
5988{
5989 if (cfun->machine->stack_frame_required)
5990 return get_frame_size ();
5991 else
5992 return 0;
5993}
5994
5995/* Value should be nonzero if functions must have frame pointers.
5996 Zero means the frame pointer need not be set up (and parms may
5997 be accessed via the stack pointer) in functions that seem suitable. */
5998
5999static bool
6000ix86_frame_pointer_required (void)
6001{
6002 /* If we accessed previous frames, then the generated code expects
6003 to be able to access the saved ebp value in our frame. */
6004 if (cfun->machine->accesses_prev_frame)
6005 return true;
6006
6007 /* Several x86 os'es need a frame pointer for other reasons,
6008 usually pertaining to setjmp. */
6009 if (SUBTARGET_FRAME_POINTER_REQUIRED)
6010 return true;
6011
6012 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
6013 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
6014 return true;
6015
6016 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
6017 allocation is 4GB. */
6018 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
6019 return true;
6020
6021 /* SSE saves require frame-pointer when stack is misaligned. */
6022 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
6023 return true;
6024
6025 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
6026 turns off the frame pointer by default. Turn it back on now if
6027 we've not got a leaf function. */
6028 if (TARGET_OMIT_LEAF_FRAME_POINTER
6029 && (!crtl->is_leaf
6030 || ix86_current_function_calls_tls_descriptor))
6031 return true;
6032
6033 /* Several versions of mcount for the x86 assumes that there is a
6034 frame, so we cannot allow profiling without a frame pointer. */
6035 if (crtl->profile && !flag_fentry)
6036 return true;
6037
6038 return false;
6039}
6040
6041/* Record that the current function accesses previous call frames. */
6042
6043void
6044ix86_setup_frame_addresses (void)
6045{
6046 cfun->machine->accesses_prev_frame = 1;
6047}
6048
6049#if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6050# define USE_HIDDEN_LINKONCE 1
6051#else
6052# define USE_HIDDEN_LINKONCE 0
6053#endif
6054
6055/* Label count for call and return thunks. It is used to make unique
6056 labels in call and return thunks. */
6057static int indirectlabelno;
6058
6059/* True if call thunk function is needed. */
6060static bool indirect_thunk_needed = false;
6061
6062/* Bit masks of integer registers, which contain branch target, used
6063 by call thunk functions. */
6064static HARD_REG_SET indirect_thunks_used;
6065
6066/* True if return thunk function is needed. */
6067static bool indirect_return_needed = false;
6068
6069/* True if return thunk function via CX is needed. */
6070static bool indirect_return_via_cx;
6071
6072#ifndef INDIRECT_LABEL
6073# define INDIRECT_LABEL "LIND"
6074#endif
6075
6076/* Indicate what prefix is needed for an indirect branch. */
6077enum indirect_thunk_prefix
6078{
6079 indirect_thunk_prefix_none,
6080 indirect_thunk_prefix_nt
6081};
6082
6083/* Return the prefix needed for an indirect branch INSN. */
6084
6085enum indirect_thunk_prefix
6086indirect_thunk_need_prefix (rtx_insn *insn)
6087{
6088 enum indirect_thunk_prefix need_prefix;
6089 if ((cfun->machine->indirect_branch_type
6090 == indirect_branch_thunk_extern)
6091 && ix86_notrack_prefixed_insn_p (insn))
6092 {
6093 /* NOTRACK prefix is only used with external thunk so that it
6094 can be properly updated to support CET at run-time. */
6095 need_prefix = indirect_thunk_prefix_nt;
6096 }
6097 else
6098 need_prefix = indirect_thunk_prefix_none;
6099 return need_prefix;
6100}
6101
6102/* Fills in the label name that should be used for the indirect thunk. */
6103
6104static void
6105indirect_thunk_name (char name[32], unsigned int regno,
6106 enum indirect_thunk_prefix need_prefix,
6107 bool ret_p)
6108{
6109 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6110 gcc_unreachable ();
6111
6112 if (USE_HIDDEN_LINKONCE)
6113 {
6114 const char *prefix;
6115
6116 if (need_prefix == indirect_thunk_prefix_nt
6117 && regno != INVALID_REGNUM)
6118 {
6119 /* NOTRACK prefix is only used with external thunk via
6120 register so that NOTRACK prefix can be added to indirect
6121 branch via register to support CET at run-time. */
6122 prefix = "_nt";
6123 }
6124 else
6125 prefix = "";
6126
6127 const char *ret = ret_p ? "return" : "indirect";
6128
6129 if (regno != INVALID_REGNUM)
6130 {
6131 const char *reg_prefix;
6132 if (LEGACY_INT_REGNO_P (regno))
6133 reg_prefix = TARGET_64BIT ? "r" : "e";
6134 else
6135 reg_prefix = "";
6136 sprintf (s: name, format: "__x86_%s_thunk%s_%s%s",
6137 ret, prefix, reg_prefix, reg_names[regno]);
6138 }
6139 else
6140 sprintf (s: name, format: "__x86_%s_thunk%s", ret, prefix);
6141 }
6142 else
6143 {
6144 if (regno != INVALID_REGNUM)
6145 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6146 else
6147 {
6148 if (ret_p)
6149 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6150 else
6151 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6152 }
6153 }
6154}
6155
6156/* Output a call and return thunk for indirect branch. If REGNO != -1,
6157 the function address is in REGNO and the call and return thunk looks like:
6158
6159 call L2
6160 L1:
6161 pause
6162 lfence
6163 jmp L1
6164 L2:
6165 mov %REG, (%sp)
6166 ret
6167
6168 Otherwise, the function address is on the top of stack and the
6169 call and return thunk looks like:
6170
6171 call L2
6172 L1:
6173 pause
6174 lfence
6175 jmp L1
6176 L2:
6177 lea WORD_SIZE(%sp), %sp
6178 ret
6179 */
6180
6181static void
6182output_indirect_thunk (unsigned int regno)
6183{
6184 char indirectlabel1[32];
6185 char indirectlabel2[32];
6186
6187 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6188 indirectlabelno++);
6189 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6190 indirectlabelno++);
6191
6192 /* Call */
6193 fputs (s: "\tcall\t", stream: asm_out_file);
6194 assemble_name_raw (asm_out_file, indirectlabel2);
6195 fputc (c: '\n', stream: asm_out_file);
6196
6197 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6198
6199 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6200 Usage of both pause + lfence is compromise solution. */
6201 fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n");
6202
6203 /* Jump. */
6204 fputs (s: "\tjmp\t", stream: asm_out_file);
6205 assemble_name_raw (asm_out_file, indirectlabel1);
6206 fputc (c: '\n', stream: asm_out_file);
6207
6208 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6209
6210 /* The above call insn pushed a word to stack. Adjust CFI info. */
6211 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6212 {
6213 if (! dwarf2out_do_cfi_asm ())
6214 {
6215 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6216 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6217 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6218 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6219 }
6220 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6221 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6222 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6223 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6224 dwarf2out_emit_cfi (cfi: xcfi);
6225 }
6226
6227 if (regno != INVALID_REGNUM)
6228 {
6229 /* MOV. */
6230 rtx xops[2];
6231 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6232 xops[1] = gen_rtx_REG (word_mode, regno);
6233 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6234 }
6235 else
6236 {
6237 /* LEA. */
6238 rtx xops[2];
6239 xops[0] = stack_pointer_rtx;
6240 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6241 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6242 }
6243
6244 fputs (s: "\tret\n", stream: asm_out_file);
6245 if ((ix86_harden_sls & harden_sls_return))
6246 fputs (s: "\tint3\n", stream: asm_out_file);
6247}
6248
6249/* Output a funtion with a call and return thunk for indirect branch.
6250 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6251 Otherwise, the function address is on the top of stack. Thunk is
6252 used for function return if RET_P is true. */
6253
6254static void
6255output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6256 unsigned int regno, bool ret_p)
6257{
6258 char name[32];
6259 tree decl;
6260
6261 /* Create __x86_indirect_thunk. */
6262 indirect_thunk_name (name, regno, need_prefix, ret_p);
6263 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6264 get_identifier (name),
6265 build_function_type_list (void_type_node, NULL_TREE));
6266 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6267 NULL_TREE, void_type_node);
6268 TREE_PUBLIC (decl) = 1;
6269 TREE_STATIC (decl) = 1;
6270 DECL_IGNORED_P (decl) = 1;
6271
6272#if TARGET_MACHO
6273 if (TARGET_MACHO)
6274 {
6275 switch_to_section (darwin_sections[picbase_thunk_section]);
6276 fputs ("\t.weak_definition\t", asm_out_file);
6277 assemble_name (asm_out_file, name);
6278 fputs ("\n\t.private_extern\t", asm_out_file);
6279 assemble_name (asm_out_file, name);
6280 putc ('\n', asm_out_file);
6281 ASM_OUTPUT_LABEL (asm_out_file, name);
6282 DECL_WEAK (decl) = 1;
6283 }
6284 else
6285#endif
6286 if (USE_HIDDEN_LINKONCE)
6287 {
6288 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6289
6290 targetm.asm_out.unique_section (decl, 0);
6291 switch_to_section (get_named_section (decl, NULL, 0));
6292
6293 targetm.asm_out.globalize_label (asm_out_file, name);
6294 fputs (s: "\t.hidden\t", stream: asm_out_file);
6295 assemble_name (asm_out_file, name);
6296 putc (c: '\n', stream: asm_out_file);
6297 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6298 }
6299 else
6300 {
6301 switch_to_section (text_section);
6302 ASM_OUTPUT_LABEL (asm_out_file, name);
6303 }
6304
6305 DECL_INITIAL (decl) = make_node (BLOCK);
6306 current_function_decl = decl;
6307 allocate_struct_function (decl, false);
6308 init_function_start (decl);
6309 /* We're about to hide the function body from callees of final_* by
6310 emitting it directly; tell them we're a thunk, if they care. */
6311 cfun->is_thunk = true;
6312 first_function_block_is_cold = false;
6313 /* Make sure unwind info is emitted for the thunk if needed. */
6314 final_start_function (emit_barrier (), asm_out_file, 1);
6315
6316 output_indirect_thunk (regno);
6317
6318 final_end_function ();
6319 init_insn_lengths ();
6320 free_after_compilation (cfun);
6321 set_cfun (NULL);
6322 current_function_decl = NULL;
6323}
6324
6325static int pic_labels_used;
6326
6327/* Fills in the label name that should be used for a pc thunk for
6328 the given register. */
6329
6330static void
6331get_pc_thunk_name (char name[32], unsigned int regno)
6332{
6333 gcc_assert (!TARGET_64BIT);
6334
6335 if (USE_HIDDEN_LINKONCE)
6336 sprintf (s: name, format: "__x86.get_pc_thunk.%s", reg_names[regno]);
6337 else
6338 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6339}
6340
6341
6342/* This function generates code for -fpic that loads %ebx with
6343 the return address of the caller and then returns. */
6344
6345static void
6346ix86_code_end (void)
6347{
6348 rtx xops[2];
6349 unsigned int regno;
6350
6351 if (indirect_return_needed)
6352 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6353 INVALID_REGNUM, ret_p: true);
6354 if (indirect_return_via_cx)
6355 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6356 CX_REG, ret_p: true);
6357 if (indirect_thunk_needed)
6358 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6359 INVALID_REGNUM, ret_p: false);
6360
6361 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6362 {
6363 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6364 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6365 regno, ret_p: false);
6366 }
6367
6368 for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6369 {
6370 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6371 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6372 regno, ret_p: false);
6373 }
6374
6375 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6376 {
6377 char name[32];
6378 tree decl;
6379
6380 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6381 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6382 regno, ret_p: false);
6383
6384 if (!(pic_labels_used & (1 << regno)))
6385 continue;
6386
6387 get_pc_thunk_name (name, regno);
6388
6389 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6390 get_identifier (name),
6391 build_function_type_list (void_type_node, NULL_TREE));
6392 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6393 NULL_TREE, void_type_node);
6394 TREE_PUBLIC (decl) = 1;
6395 TREE_STATIC (decl) = 1;
6396 DECL_IGNORED_P (decl) = 1;
6397
6398#if TARGET_MACHO
6399 if (TARGET_MACHO)
6400 {
6401 switch_to_section (darwin_sections[picbase_thunk_section]);
6402 fputs ("\t.weak_definition\t", asm_out_file);
6403 assemble_name (asm_out_file, name);
6404 fputs ("\n\t.private_extern\t", asm_out_file);
6405 assemble_name (asm_out_file, name);
6406 putc ('\n', asm_out_file);
6407 ASM_OUTPUT_LABEL (asm_out_file, name);
6408 DECL_WEAK (decl) = 1;
6409 }
6410 else
6411#endif
6412 if (USE_HIDDEN_LINKONCE)
6413 {
6414 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6415
6416 targetm.asm_out.unique_section (decl, 0);
6417 switch_to_section (get_named_section (decl, NULL, 0));
6418
6419 targetm.asm_out.globalize_label (asm_out_file, name);
6420 fputs (s: "\t.hidden\t", stream: asm_out_file);
6421 assemble_name (asm_out_file, name);
6422 putc (c: '\n', stream: asm_out_file);
6423 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6424 }
6425 else
6426 {
6427 switch_to_section (text_section);
6428 ASM_OUTPUT_LABEL (asm_out_file, name);
6429 }
6430
6431 DECL_INITIAL (decl) = make_node (BLOCK);
6432 current_function_decl = decl;
6433 allocate_struct_function (decl, false);
6434 init_function_start (decl);
6435 /* We're about to hide the function body from callees of final_* by
6436 emitting it directly; tell them we're a thunk, if they care. */
6437 cfun->is_thunk = true;
6438 first_function_block_is_cold = false;
6439 /* Make sure unwind info is emitted for the thunk if needed. */
6440 final_start_function (emit_barrier (), asm_out_file, 1);
6441
6442 /* Pad stack IP move with 4 instructions (two NOPs count
6443 as one instruction). */
6444 if (TARGET_PAD_SHORT_FUNCTION)
6445 {
6446 int i = 8;
6447
6448 while (i--)
6449 fputs (s: "\tnop\n", stream: asm_out_file);
6450 }
6451
6452 xops[0] = gen_rtx_REG (Pmode, regno);
6453 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6454 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6455 fputs (s: "\tret\n", stream: asm_out_file);
6456 final_end_function ();
6457 init_insn_lengths ();
6458 free_after_compilation (cfun);
6459 set_cfun (NULL);
6460 current_function_decl = NULL;
6461 }
6462
6463 if (flag_split_stack)
6464 file_end_indicate_split_stack ();
6465}
6466
6467/* Emit code for the SET_GOT patterns. */
6468
6469const char *
6470output_set_got (rtx dest, rtx label)
6471{
6472 rtx xops[3];
6473
6474 xops[0] = dest;
6475
6476 if (TARGET_VXWORKS_RTP && flag_pic)
6477 {
6478 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6479 xops[2] = gen_rtx_MEM (Pmode,
6480 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6481 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6482
6483 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6484 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6485 an unadorned address. */
6486 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6487 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6488 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6489 return "";
6490 }
6491
6492 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6493
6494 if (flag_pic)
6495 {
6496 char name[32];
6497 get_pc_thunk_name (name, REGNO (dest));
6498 pic_labels_used |= 1 << REGNO (dest);
6499
6500 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6501 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6502 output_asm_insn ("%!call\t%X2", xops);
6503
6504#if TARGET_MACHO
6505 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6506 This is what will be referenced by the Mach-O PIC subsystem. */
6507 if (machopic_should_output_picbase_label () || !label)
6508 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6509
6510 /* When we are restoring the pic base at the site of a nonlocal label,
6511 and we decided to emit the pic base above, we will still output a
6512 local label used for calculating the correction offset (even though
6513 the offset will be 0 in that case). */
6514 if (label)
6515 targetm.asm_out.internal_label (asm_out_file, "L",
6516 CODE_LABEL_NUMBER (label));
6517#endif
6518 }
6519 else
6520 {
6521 if (TARGET_MACHO)
6522 /* We don't need a pic base, we're not producing pic. */
6523 gcc_unreachable ();
6524
6525 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6526 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6527 targetm.asm_out.internal_label (asm_out_file, "L",
6528 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6529 }
6530
6531 if (!TARGET_MACHO)
6532 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6533
6534 return "";
6535}
6536
6537/* Generate an "push" pattern for input ARG. */
6538
6539rtx
6540gen_push (rtx arg, bool ppx_p)
6541{
6542 struct machine_function *m = cfun->machine;
6543
6544 if (m->fs.cfa_reg == stack_pointer_rtx)
6545 m->fs.cfa_offset += UNITS_PER_WORD;
6546 m->fs.sp_offset += UNITS_PER_WORD;
6547
6548 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6549 arg = gen_rtx_REG (word_mode, REGNO (arg));
6550
6551 rtx stack = gen_rtx_MEM (word_mode,
6552 gen_rtx_PRE_DEC (Pmode,
6553 stack_pointer_rtx));
6554 return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6555}
6556
6557rtx
6558gen_pushfl (void)
6559{
6560 struct machine_function *m = cfun->machine;
6561 rtx flags, mem;
6562
6563 if (m->fs.cfa_reg == stack_pointer_rtx)
6564 m->fs.cfa_offset += UNITS_PER_WORD;
6565 m->fs.sp_offset += UNITS_PER_WORD;
6566
6567 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6568
6569 mem = gen_rtx_MEM (word_mode,
6570 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6571
6572 return gen_pushfl2 (arg0: word_mode, x0: mem, x1: flags);
6573}
6574
6575/* Generate an "pop" pattern for input ARG. */
6576
6577rtx
6578gen_pop (rtx arg, bool ppx_p)
6579{
6580 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6581 arg = gen_rtx_REG (word_mode, REGNO (arg));
6582
6583 rtx stack = gen_rtx_MEM (word_mode,
6584 gen_rtx_POST_INC (Pmode,
6585 stack_pointer_rtx));
6586
6587 return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6588}
6589
6590rtx
6591gen_popfl (void)
6592{
6593 rtx flags, mem;
6594
6595 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6596
6597 mem = gen_rtx_MEM (word_mode,
6598 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6599
6600 return gen_popfl1 (arg0: word_mode, x0: flags, x1: mem);
6601}
6602
6603/* Generate a "push2" pattern for input ARG. */
6604rtx
6605gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6606{
6607 struct machine_function *m = cfun->machine;
6608 const int offset = UNITS_PER_WORD * 2;
6609
6610 if (m->fs.cfa_reg == stack_pointer_rtx)
6611 m->fs.cfa_offset += offset;
6612 m->fs.sp_offset += offset;
6613
6614 if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6615 reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6616
6617 if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6618 reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6619
6620 return ppx_p ? gen_push2p_di (mem, reg1, reg2)
6621 : gen_push2_di (mem, reg1, reg2);
6622}
6623
6624/* Return >= 0 if there is an unused call-clobbered register available
6625 for the entire function. */
6626
6627static unsigned int
6628ix86_select_alt_pic_regnum (void)
6629{
6630 if (ix86_use_pseudo_pic_reg ())
6631 return INVALID_REGNUM;
6632
6633 if (crtl->is_leaf
6634 && !crtl->profile
6635 && !ix86_current_function_calls_tls_descriptor)
6636 {
6637 int i, drap;
6638 /* Can't use the same register for both PIC and DRAP. */
6639 if (crtl->drap_reg)
6640 drap = REGNO (crtl->drap_reg);
6641 else
6642 drap = -1;
6643 for (i = 2; i >= 0; --i)
6644 if (i != drap && !df_regs_ever_live_p (i))
6645 return i;
6646 }
6647
6648 return INVALID_REGNUM;
6649}
6650
6651/* Return true if REGNO is used by the epilogue. */
6652
6653bool
6654ix86_epilogue_uses (int regno)
6655{
6656 /* If there are no caller-saved registers, we preserve all registers,
6657 except for MMX and x87 registers which aren't supported when saving
6658 and restoring registers. Don't explicitly save SP register since
6659 it is always preserved. */
6660 return (epilogue_completed
6661 && (cfun->machine->call_saved_registers
6662 == TYPE_NO_CALLER_SAVED_REGISTERS)
6663 && !fixed_regs[regno]
6664 && !STACK_REGNO_P (regno)
6665 && !MMX_REGNO_P (regno));
6666}
6667
6668/* Return nonzero if register REGNO can be used as a scratch register
6669 in peephole2. */
6670
6671static bool
6672ix86_hard_regno_scratch_ok (unsigned int regno)
6673{
6674 /* If there are no caller-saved registers, we can't use any register
6675 as a scratch register after epilogue and use REGNO as scratch
6676 register only if it has been used before to avoid saving and
6677 restoring it. */
6678 return ((cfun->machine->call_saved_registers
6679 != TYPE_NO_CALLER_SAVED_REGISTERS)
6680 || (!epilogue_completed
6681 && df_regs_ever_live_p (regno)));
6682}
6683
6684/* Return TRUE if we need to save REGNO. */
6685
6686bool
6687ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6688{
6689 rtx reg;
6690
6691 switch (cfun->machine->call_saved_registers)
6692 {
6693 case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6694 break;
6695
6696 case TYPE_NO_CALLER_SAVED_REGISTERS:
6697 /* If there are no caller-saved registers, we preserve all
6698 registers, except for MMX and x87 registers which aren't
6699 supported when saving and restoring registers. Don't
6700 explicitly save SP register since it is always preserved.
6701
6702 Don't preserve registers used for function return value. */
6703 reg = crtl->return_rtx;
6704 if (reg)
6705 {
6706 unsigned int i = REGNO (reg);
6707 unsigned int nregs = REG_NREGS (reg);
6708 while (nregs-- > 0)
6709 if ((i + nregs) == regno)
6710 return false;
6711 }
6712
6713 return (df_regs_ever_live_p (regno)
6714 && !fixed_regs[regno]
6715 && !STACK_REGNO_P (regno)
6716 && !MMX_REGNO_P (regno)
6717 && (regno != HARD_FRAME_POINTER_REGNUM
6718 || !frame_pointer_needed));
6719
6720 case TYPE_NO_CALLEE_SAVED_REGISTERS:
6721 return false;
6722
6723 case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
6724 if (regno != HARD_FRAME_POINTER_REGNUM)
6725 return false;
6726 break;
6727 }
6728
6729 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6730 && pic_offset_table_rtx)
6731 {
6732 if (ix86_use_pseudo_pic_reg ())
6733 {
6734 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6735 _mcount in prologue. */
6736 if (!TARGET_64BIT && flag_pic && crtl->profile)
6737 return true;
6738 }
6739 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6740 || crtl->profile
6741 || crtl->calls_eh_return
6742 || crtl->uses_const_pool
6743 || cfun->has_nonlocal_label)
6744 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6745 }
6746
6747 if (crtl->calls_eh_return && maybe_eh_return)
6748 {
6749 unsigned i;
6750 for (i = 0; ; i++)
6751 {
6752 unsigned test = EH_RETURN_DATA_REGNO (i);
6753 if (test == INVALID_REGNUM)
6754 break;
6755 if (test == regno)
6756 return true;
6757 }
6758 }
6759
6760 if (ignore_outlined && cfun->machine->call_ms2sysv)
6761 {
6762 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6763 + xlogue_layout::MIN_REGS;
6764 if (xlogue_layout::is_stub_managed_reg (regno, count))
6765 return false;
6766 }
6767
6768 if (crtl->drap_reg
6769 && regno == REGNO (crtl->drap_reg)
6770 && !cfun->machine->no_drap_save_restore)
6771 return true;
6772
6773 return (df_regs_ever_live_p (regno)
6774 && !call_used_or_fixed_reg_p (regno)
6775 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6776}
6777
6778/* Return number of saved general prupose registers. */
6779
6780static int
6781ix86_nsaved_regs (void)
6782{
6783 int nregs = 0;
6784 int regno;
6785
6786 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6787 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6788 nregs ++;
6789 return nregs;
6790}
6791
6792/* Return number of saved SSE registers. */
6793
6794static int
6795ix86_nsaved_sseregs (void)
6796{
6797 int nregs = 0;
6798 int regno;
6799
6800 if (!TARGET_64BIT_MS_ABI)
6801 return 0;
6802 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6803 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6804 nregs ++;
6805 return nregs;
6806}
6807
6808/* Given FROM and TO register numbers, say whether this elimination is
6809 allowed. If stack alignment is needed, we can only replace argument
6810 pointer with hard frame pointer, or replace frame pointer with stack
6811 pointer. Otherwise, frame pointer elimination is automatically
6812 handled and all other eliminations are valid. */
6813
6814static bool
6815ix86_can_eliminate (const int from, const int to)
6816{
6817 if (stack_realign_fp)
6818 return ((from == ARG_POINTER_REGNUM
6819 && to == HARD_FRAME_POINTER_REGNUM)
6820 || (from == FRAME_POINTER_REGNUM
6821 && to == STACK_POINTER_REGNUM));
6822 else
6823 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6824}
6825
6826/* Return the offset between two registers, one to be eliminated, and the other
6827 its replacement, at the start of a routine. */
6828
6829HOST_WIDE_INT
6830ix86_initial_elimination_offset (int from, int to)
6831{
6832 struct ix86_frame &frame = cfun->machine->frame;
6833
6834 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6835 return frame.hard_frame_pointer_offset;
6836 else if (from == FRAME_POINTER_REGNUM
6837 && to == HARD_FRAME_POINTER_REGNUM)
6838 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6839 else
6840 {
6841 gcc_assert (to == STACK_POINTER_REGNUM);
6842
6843 if (from == ARG_POINTER_REGNUM)
6844 return frame.stack_pointer_offset;
6845
6846 gcc_assert (from == FRAME_POINTER_REGNUM);
6847 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6848 }
6849}
6850
6851/* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6852void
6853warn_once_call_ms2sysv_xlogues (const char *feature)
6854{
6855 static bool warned_once = false;
6856 if (!warned_once)
6857 {
6858 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6859 feature);
6860 warned_once = true;
6861 }
6862}
6863
6864/* Return the probing interval for -fstack-clash-protection. */
6865
6866static HOST_WIDE_INT
6867get_probe_interval (void)
6868{
6869 if (flag_stack_clash_protection)
6870 return (HOST_WIDE_INT_1U
6871 << param_stack_clash_protection_probe_interval);
6872 else
6873 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6874}
6875
6876/* When using -fsplit-stack, the allocation routines set a field in
6877 the TCB to the bottom of the stack plus this much space, measured
6878 in bytes. */
6879
6880#define SPLIT_STACK_AVAILABLE 256
6881
6882/* Return true if push2/pop2 can be generated. */
6883
6884static bool
6885ix86_can_use_push2pop2 (void)
6886{
6887 /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6888 unsigned int incoming_stack_boundary
6889 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6890 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6891 return incoming_stack_boundary % 128 == 0;
6892}
6893
6894/* Helper function to determine whether push2/pop2 can be used in prologue or
6895 epilogue for register save/restore. */
6896static bool
6897ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6898{
6899 if (!ix86_can_use_push2pop2 ())
6900 return false;
6901 int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6902 return TARGET_APX_PUSH2POP2
6903 && !cfun->machine->frame.save_regs_using_mov
6904 && cfun->machine->func_type == TYPE_NORMAL
6905 && (nregs + aligned) >= 3;
6906}
6907
6908/* Fill structure ix86_frame about frame of currently computed function. */
6909
6910static void
6911ix86_compute_frame_layout (void)
6912{
6913 struct ix86_frame *frame = &cfun->machine->frame;
6914 struct machine_function *m = cfun->machine;
6915 unsigned HOST_WIDE_INT stack_alignment_needed;
6916 HOST_WIDE_INT offset;
6917 unsigned HOST_WIDE_INT preferred_alignment;
6918 HOST_WIDE_INT size = ix86_get_frame_size ();
6919 HOST_WIDE_INT to_allocate;
6920
6921 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6922 * ms_abi functions that call a sysv function. We now need to prune away
6923 * cases where it should be disabled. */
6924 if (TARGET_64BIT && m->call_ms2sysv)
6925 {
6926 gcc_assert (TARGET_64BIT_MS_ABI);
6927 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6928 gcc_assert (!TARGET_SEH);
6929 gcc_assert (TARGET_SSE);
6930 gcc_assert (!ix86_using_red_zone ());
6931
6932 if (crtl->calls_eh_return)
6933 {
6934 gcc_assert (!reload_completed);
6935 m->call_ms2sysv = false;
6936 warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return");
6937 }
6938
6939 else if (ix86_static_chain_on_stack)
6940 {
6941 gcc_assert (!reload_completed);
6942 m->call_ms2sysv = false;
6943 warn_once_call_ms2sysv_xlogues (feature: "static call chains");
6944 }
6945
6946 /* Finally, compute which registers the stub will manage. */
6947 else
6948 {
6949 unsigned count = xlogue_layout::count_stub_managed_regs ();
6950 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6951 m->call_ms2sysv_pad_in = 0;
6952 }
6953 }
6954
6955 frame->nregs = ix86_nsaved_regs ();
6956 frame->nsseregs = ix86_nsaved_sseregs ();
6957
6958 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6959 except for function prologues, leaf functions and when the defult
6960 incoming stack boundary is overriden at command line or via
6961 force_align_arg_pointer attribute.
6962
6963 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6964 at call sites, including profile function calls.
6965
6966 For APX push2/pop2, the stack also requires 128b alignment. */
6967 if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs)
6968 && crtl->preferred_stack_boundary < 128)
6969 || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6970 && crtl->preferred_stack_boundary < 128)
6971 && (!crtl->is_leaf || cfun->calls_alloca != 0
6972 || ix86_current_function_calls_tls_descriptor
6973 || (TARGET_MACHO && crtl->profile)
6974 || ix86_incoming_stack_boundary < 128)))
6975 {
6976 crtl->preferred_stack_boundary = 128;
6977 if (crtl->stack_alignment_needed < 128)
6978 crtl->stack_alignment_needed = 128;
6979 }
6980
6981 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6982 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6983
6984 gcc_assert (!size || stack_alignment_needed);
6985 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6986 gcc_assert (preferred_alignment <= stack_alignment_needed);
6987
6988 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6989 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6990 if (TARGET_64BIT && m->call_ms2sysv)
6991 {
6992 gcc_assert (stack_alignment_needed >= 16);
6993 gcc_assert (!frame->nsseregs);
6994 }
6995
6996 /* For SEH we have to limit the amount of code movement into the prologue.
6997 At present we do this via a BLOCKAGE, at which point there's very little
6998 scheduling that can be done, which means that there's very little point
6999 in doing anything except PUSHs. */
7000 if (TARGET_SEH)
7001 m->use_fast_prologue_epilogue = false;
7002 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
7003 {
7004 int count = frame->nregs;
7005 struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
7006
7007 /* The fast prologue uses move instead of push to save registers. This
7008 is significantly longer, but also executes faster as modern hardware
7009 can execute the moves in parallel, but can't do that for push/pop.
7010
7011 Be careful about choosing what prologue to emit: When function takes
7012 many instructions to execute we may use slow version as well as in
7013 case function is known to be outside hot spot (this is known with
7014 feedback only). Weight the size of function by number of registers
7015 to save as it is cheap to use one or two push instructions but very
7016 slow to use many of them.
7017
7018 Calling this hook multiple times with the same frame requirements
7019 must produce the same layout, since the RA might otherwise be
7020 unable to reach a fixed point or might fail its final sanity checks.
7021 This means that once we've assumed that a function does or doesn't
7022 have a particular size, we have to stick to that assumption
7023 regardless of how the function has changed since. */
7024 if (count)
7025 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7026 if (node->frequency < NODE_FREQUENCY_NORMAL
7027 || (flag_branch_probabilities
7028 && node->frequency < NODE_FREQUENCY_HOT))
7029 m->use_fast_prologue_epilogue = false;
7030 else
7031 {
7032 if (count != frame->expensive_count)
7033 {
7034 frame->expensive_count = count;
7035 frame->expensive_p = expensive_function_p (count);
7036 }
7037 m->use_fast_prologue_epilogue = !frame->expensive_p;
7038 }
7039 }
7040
7041 frame->save_regs_using_mov
7042 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7043
7044 /* Skip return address and error code in exception handler. */
7045 offset = INCOMING_FRAME_SP_OFFSET;
7046
7047 /* Skip pushed static chain. */
7048 if (ix86_static_chain_on_stack)
7049 offset += UNITS_PER_WORD;
7050
7051 /* Skip saved base pointer. */
7052 if (frame_pointer_needed)
7053 offset += UNITS_PER_WORD;
7054 frame->hfp_save_offset = offset;
7055
7056 /* The traditional frame pointer location is at the top of the frame. */
7057 frame->hard_frame_pointer_offset = offset;
7058
7059 /* Register save area */
7060 offset += frame->nregs * UNITS_PER_WORD;
7061 frame->reg_save_offset = offset;
7062
7063 /* Calculate the size of the va-arg area (not including padding, if any). */
7064 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7065
7066 /* Also adjust stack_realign_offset for the largest alignment of
7067 stack slot actually used. */
7068 if (stack_realign_fp
7069 || (cfun->machine->max_used_stack_alignment != 0
7070 && (offset % cfun->machine->max_used_stack_alignment) != 0))
7071 {
7072 /* We may need a 16-byte aligned stack for the remainder of the
7073 register save area, but the stack frame for the local function
7074 may require a greater alignment if using AVX/2/512. In order
7075 to avoid wasting space, we first calculate the space needed for
7076 the rest of the register saves, add that to the stack pointer,
7077 and then realign the stack to the boundary of the start of the
7078 frame for the local function. */
7079 HOST_WIDE_INT space_needed = 0;
7080 HOST_WIDE_INT sse_reg_space_needed = 0;
7081
7082 if (TARGET_64BIT)
7083 {
7084 if (m->call_ms2sysv)
7085 {
7086 m->call_ms2sysv_pad_in = 0;
7087 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7088 }
7089
7090 else if (frame->nsseregs)
7091 /* The only ABI that has saved SSE registers (Win64) also has a
7092 16-byte aligned default stack. However, many programs violate
7093 the ABI, and Wine64 forces stack realignment to compensate. */
7094 space_needed = frame->nsseregs * 16;
7095
7096 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7097
7098 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7099 rounding to be pedantic. */
7100 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7101 }
7102 else
7103 space_needed = frame->va_arg_size;
7104
7105 /* Record the allocation size required prior to the realignment AND. */
7106 frame->stack_realign_allocate = space_needed;
7107
7108 /* The re-aligned stack starts at frame->stack_realign_offset. Values
7109 before this point are not directly comparable with values below
7110 this point. Use sp_valid_at to determine if the stack pointer is
7111 valid for a given offset, fp_valid_at for the frame pointer, or
7112 choose_baseaddr to have a base register chosen for you.
7113
7114 Note that the result of (frame->stack_realign_offset
7115 & (stack_alignment_needed - 1)) may not equal zero. */
7116 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7117 frame->stack_realign_offset = offset - space_needed;
7118 frame->sse_reg_save_offset = frame->stack_realign_offset
7119 + sse_reg_space_needed;
7120 }
7121 else
7122 {
7123 frame->stack_realign_offset = offset;
7124
7125 if (TARGET_64BIT && m->call_ms2sysv)
7126 {
7127 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7128 offset += xlogue_layout::get_instance ().get_stack_space_used ();
7129 }
7130
7131 /* Align and set SSE register save area. */
7132 else if (frame->nsseregs)
7133 {
7134 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7135 required and the DRAP re-alignment boundary is at least 16 bytes,
7136 then we want the SSE register save area properly aligned. */
7137 if (ix86_incoming_stack_boundary >= 128
7138 || (stack_realign_drap && stack_alignment_needed >= 16))
7139 offset = ROUND_UP (offset, 16);
7140 offset += frame->nsseregs * 16;
7141 }
7142 frame->sse_reg_save_offset = offset;
7143 offset += frame->va_arg_size;
7144 }
7145
7146 /* Align start of frame for local function. When a function call
7147 is removed, it may become a leaf function. But if argument may
7148 be passed on stack, we need to align the stack when there is no
7149 tail call. */
7150 if (m->call_ms2sysv
7151 || frame->va_arg_size != 0
7152 || size != 0
7153 || !crtl->is_leaf
7154 || (!crtl->tail_call_emit
7155 && cfun->machine->outgoing_args_on_stack)
7156 || cfun->calls_alloca
7157 || ix86_current_function_calls_tls_descriptor)
7158 offset = ROUND_UP (offset, stack_alignment_needed);
7159
7160 /* Frame pointer points here. */
7161 frame->frame_pointer_offset = offset;
7162
7163 offset += size;
7164
7165 /* Add outgoing arguments area. Can be skipped if we eliminated
7166 all the function calls as dead code.
7167 Skipping is however impossible when function calls alloca. Alloca
7168 expander assumes that last crtl->outgoing_args_size
7169 of stack frame are unused. */
7170 if (ACCUMULATE_OUTGOING_ARGS
7171 && (!crtl->is_leaf || cfun->calls_alloca
7172 || ix86_current_function_calls_tls_descriptor))
7173 {
7174 offset += crtl->outgoing_args_size;
7175 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7176 }
7177 else
7178 frame->outgoing_arguments_size = 0;
7179
7180 /* Align stack boundary. Only needed if we're calling another function
7181 or using alloca. */
7182 if (!crtl->is_leaf || cfun->calls_alloca
7183 || ix86_current_function_calls_tls_descriptor)
7184 offset = ROUND_UP (offset, preferred_alignment);
7185
7186 /* We've reached end of stack frame. */
7187 frame->stack_pointer_offset = offset;
7188
7189 /* Size prologue needs to allocate. */
7190 to_allocate = offset - frame->sse_reg_save_offset;
7191
7192 if ((!to_allocate && frame->nregs <= 1)
7193 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7194 /* If static stack checking is enabled and done with probes,
7195 the registers need to be saved before allocating the frame. */
7196 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7197 /* If stack clash probing needs a loop, then it needs a
7198 scratch register. But the returned register is only guaranteed
7199 to be safe to use after register saves are complete. So if
7200 stack clash protections are enabled and the allocated frame is
7201 larger than the probe interval, then use pushes to save
7202 callee saved registers. */
7203 || (flag_stack_clash_protection
7204 && !ix86_target_stack_probe ()
7205 && to_allocate > get_probe_interval ()))
7206 frame->save_regs_using_mov = false;
7207
7208 if (ix86_using_red_zone ()
7209 && crtl->sp_is_unchanging
7210 && crtl->is_leaf
7211 && !cfun->machine->asm_redzone_clobber_seen
7212 && !ix86_pc_thunk_call_expanded
7213 && !ix86_current_function_calls_tls_descriptor)
7214 {
7215 frame->red_zone_size = to_allocate;
7216 if (frame->save_regs_using_mov)
7217 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7218 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7219 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7220 }
7221 else
7222 frame->red_zone_size = 0;
7223 frame->stack_pointer_offset -= frame->red_zone_size;
7224
7225 /* The SEH frame pointer location is near the bottom of the frame.
7226 This is enforced by the fact that the difference between the
7227 stack pointer and the frame pointer is limited to 240 bytes in
7228 the unwind data structure. */
7229 if (TARGET_SEH)
7230 {
7231 /* Force the frame pointer to point at or below the lowest register save
7232 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7233 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7234
7235 /* If we can leave the frame pointer where it is, do so; however return
7236 the establisher frame for __builtin_frame_address (0) or else if the
7237 frame overflows the SEH maximum frame size.
7238
7239 Note that the value returned by __builtin_frame_address (0) is quite
7240 constrained, because setjmp is piggybacked on the SEH machinery with
7241 recent versions of MinGW:
7242
7243 # elif defined(__SEH__)
7244 # if defined(__aarch64__) || defined(_ARM64_)
7245 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7246 # elif (__MINGW_GCC_VERSION < 40702)
7247 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7248 # else
7249 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7250 # endif
7251
7252 and the second argument passed to _setjmp, if not null, is forwarded
7253 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7254 built an ExceptionRecord on the fly describing the setjmp buffer). */
7255 const HOST_WIDE_INT diff
7256 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7257 if (diff <= 255 && !crtl->accesses_prior_frames)
7258 {
7259 /* The resulting diff will be a multiple of 16 lower than 255,
7260 i.e. at most 240 as required by the unwind data structure. */
7261 frame->hard_frame_pointer_offset += (diff & 15);
7262 }
7263 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7264 {
7265 /* Ideally we'd determine what portion of the local stack frame
7266 (within the constraint of the lowest 240) is most heavily used.
7267 But without that complication, simply bias the frame pointer
7268 by 128 bytes so as to maximize the amount of the local stack
7269 frame that is addressable with 8-bit offsets. */
7270 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7271 }
7272 else
7273 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7274 }
7275}
7276
7277/* This is semi-inlined memory_address_length, but simplified
7278 since we know that we're always dealing with reg+offset, and
7279 to avoid having to create and discard all that rtl. */
7280
7281static inline int
7282choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7283{
7284 int len = 4;
7285
7286 if (offset == 0)
7287 {
7288 /* EBP and R13 cannot be encoded without an offset. */
7289 len = (regno == BP_REG || regno == R13_REG);
7290 }
7291 else if (IN_RANGE (offset, -128, 127))
7292 len = 1;
7293
7294 /* ESP and R12 must be encoded with a SIB byte. */
7295 if (regno == SP_REG || regno == R12_REG)
7296 len++;
7297
7298 return len;
7299}
7300
7301/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7302 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7303
7304static bool
7305sp_valid_at (HOST_WIDE_INT cfa_offset)
7306{
7307 const struct machine_frame_state &fs = cfun->machine->fs;
7308 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7309 {
7310 /* Validate that the cfa_offset isn't in a "no-man's land". */
7311 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7312 return false;
7313 }
7314 return fs.sp_valid;
7315}
7316
7317/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7318 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7319
7320static inline bool
7321fp_valid_at (HOST_WIDE_INT cfa_offset)
7322{
7323 const struct machine_frame_state &fs = cfun->machine->fs;
7324 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7325 {
7326 /* Validate that the cfa_offset isn't in a "no-man's land". */
7327 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7328 return false;
7329 }
7330 return fs.fp_valid;
7331}
7332
7333/* Choose a base register based upon alignment requested, speed and/or
7334 size. */
7335
7336static void
7337choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7338 HOST_WIDE_INT &base_offset,
7339 unsigned int align_reqested, unsigned int *align)
7340{
7341 const struct machine_function *m = cfun->machine;
7342 unsigned int hfp_align;
7343 unsigned int drap_align;
7344 unsigned int sp_align;
7345 bool hfp_ok = fp_valid_at (cfa_offset);
7346 bool drap_ok = m->fs.drap_valid;
7347 bool sp_ok = sp_valid_at (cfa_offset);
7348
7349 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7350
7351 /* Filter out any registers that don't meet the requested alignment
7352 criteria. */
7353 if (align_reqested)
7354 {
7355 if (m->fs.realigned)
7356 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7357 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7358 notes (which we would need to use a realigned stack pointer),
7359 so disable on SEH targets. */
7360 else if (m->fs.sp_realigned)
7361 sp_align = crtl->stack_alignment_needed;
7362
7363 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7364 drap_ok = drap_ok && drap_align >= align_reqested;
7365 sp_ok = sp_ok && sp_align >= align_reqested;
7366 }
7367
7368 if (m->use_fast_prologue_epilogue)
7369 {
7370 /* Choose the base register most likely to allow the most scheduling
7371 opportunities. Generally FP is valid throughout the function,
7372 while DRAP must be reloaded within the epilogue. But choose either
7373 over the SP due to increased encoding size. */
7374
7375 if (hfp_ok)
7376 {
7377 base_reg = hard_frame_pointer_rtx;
7378 base_offset = m->fs.fp_offset - cfa_offset;
7379 }
7380 else if (drap_ok)
7381 {
7382 base_reg = crtl->drap_reg;
7383 base_offset = 0 - cfa_offset;
7384 }
7385 else if (sp_ok)
7386 {
7387 base_reg = stack_pointer_rtx;
7388 base_offset = m->fs.sp_offset - cfa_offset;
7389 }
7390 }
7391 else
7392 {
7393 HOST_WIDE_INT toffset;
7394 int len = 16, tlen;
7395
7396 /* Choose the base register with the smallest address encoding.
7397 With a tie, choose FP > DRAP > SP. */
7398 if (sp_ok)
7399 {
7400 base_reg = stack_pointer_rtx;
7401 base_offset = m->fs.sp_offset - cfa_offset;
7402 len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset);
7403 }
7404 if (drap_ok)
7405 {
7406 toffset = 0 - cfa_offset;
7407 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset);
7408 if (tlen <= len)
7409 {
7410 base_reg = crtl->drap_reg;
7411 base_offset = toffset;
7412 len = tlen;
7413 }
7414 }
7415 if (hfp_ok)
7416 {
7417 toffset = m->fs.fp_offset - cfa_offset;
7418 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset);
7419 if (tlen <= len)
7420 {
7421 base_reg = hard_frame_pointer_rtx;
7422 base_offset = toffset;
7423 }
7424 }
7425 }
7426
7427 /* Set the align return value. */
7428 if (align)
7429 {
7430 if (base_reg == stack_pointer_rtx)
7431 *align = sp_align;
7432 else if (base_reg == crtl->drap_reg)
7433 *align = drap_align;
7434 else if (base_reg == hard_frame_pointer_rtx)
7435 *align = hfp_align;
7436 }
7437}
7438
7439/* Return an RTX that points to CFA_OFFSET within the stack frame and
7440 the alignment of address. If ALIGN is non-null, it should point to
7441 an alignment value (in bits) that is preferred or zero and will
7442 recieve the alignment of the base register that was selected,
7443 irrespective of rather or not CFA_OFFSET is a multiple of that
7444 alignment value. If it is possible for the base register offset to be
7445 non-immediate then SCRATCH_REGNO should specify a scratch register to
7446 use.
7447
7448 The valid base registers are taken from CFUN->MACHINE->FS. */
7449
7450static rtx
7451choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7452 unsigned int scratch_regno = INVALID_REGNUM)
7453{
7454 rtx base_reg = NULL;
7455 HOST_WIDE_INT base_offset = 0;
7456
7457 /* If a specific alignment is requested, try to get a base register
7458 with that alignment first. */
7459 if (align && *align)
7460 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align);
7461
7462 if (!base_reg)
7463 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align);
7464
7465 gcc_assert (base_reg != NULL);
7466
7467 rtx base_offset_rtx = GEN_INT (base_offset);
7468
7469 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7470 {
7471 gcc_assert (scratch_regno != INVALID_REGNUM);
7472
7473 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7474 emit_move_insn (scratch_reg, base_offset_rtx);
7475
7476 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7477 }
7478
7479 return plus_constant (Pmode, base_reg, base_offset);
7480}
7481
7482/* Emit code to save registers in the prologue. */
7483
7484static void
7485ix86_emit_save_regs (void)
7486{
7487 int regno;
7488 rtx_insn *insn;
7489 bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7490
7491 if (!TARGET_APX_PUSH2POP2
7492 || !ix86_can_use_push2pop2 ()
7493 || cfun->machine->func_type != TYPE_NORMAL)
7494 {
7495 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7496 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7497 {
7498 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno),
7499 ppx_p: use_ppx));
7500 RTX_FRAME_RELATED_P (insn) = 1;
7501 }
7502 }
7503 else
7504 {
7505 int regno_list[2];
7506 regno_list[0] = regno_list[1] = -1;
7507 int loaded_regnum = 0;
7508 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7509
7510 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7511 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7512 {
7513 if (aligned)
7514 {
7515 regno_list[loaded_regnum++] = regno;
7516 if (loaded_regnum == 2)
7517 {
7518 gcc_assert (regno_list[0] != -1
7519 && regno_list[1] != -1
7520 && regno_list[0] != regno_list[1]);
7521 const int offset = UNITS_PER_WORD * 2;
7522 rtx mem = gen_rtx_MEM (TImode,
7523 gen_rtx_PRE_DEC (Pmode,
7524 stack_pointer_rtx));
7525 insn = emit_insn (gen_push2 (mem,
7526 reg1: gen_rtx_REG (word_mode,
7527 regno_list[0]),
7528 reg2: gen_rtx_REG (word_mode,
7529 regno_list[1]),
7530 ppx_p: use_ppx));
7531 RTX_FRAME_RELATED_P (insn) = 1;
7532 rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7533
7534 for (int i = 0; i < 2; i++)
7535 {
7536 rtx dwarf_reg = gen_rtx_REG (word_mode,
7537 regno_list[i]);
7538 rtx sp_offset = plus_constant (Pmode,
7539 stack_pointer_rtx,
7540 + UNITS_PER_WORD
7541 * (1 - i));
7542 rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7543 sp_offset),
7544 dwarf_reg);
7545 RTX_FRAME_RELATED_P (tmp) = 1;
7546 XVECEXP (dwarf, 0, i + 1) = tmp;
7547 }
7548 rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7549 plus_constant (Pmode,
7550 stack_pointer_rtx,
7551 -offset));
7552 RTX_FRAME_RELATED_P (sp_tmp) = 1;
7553 XVECEXP (dwarf, 0, 0) = sp_tmp;
7554 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7555
7556 loaded_regnum = 0;
7557 regno_list[0] = regno_list[1] = -1;
7558 }
7559 }
7560 else
7561 {
7562 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno),
7563 ppx_p: use_ppx));
7564 RTX_FRAME_RELATED_P (insn) = 1;
7565 aligned = true;
7566 }
7567 }
7568 if (loaded_regnum == 1)
7569 {
7570 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode,
7571 regno_list[0]),
7572 ppx_p: use_ppx));
7573 RTX_FRAME_RELATED_P (insn) = 1;
7574 }
7575 }
7576}
7577
7578/* Emit a single register save at CFA - CFA_OFFSET. */
7579
7580static void
7581ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7582 HOST_WIDE_INT cfa_offset)
7583{
7584 struct machine_function *m = cfun->machine;
7585 rtx reg = gen_rtx_REG (mode, regno);
7586 rtx mem, addr, base, insn;
7587 unsigned int align = GET_MODE_ALIGNMENT (mode);
7588
7589 addr = choose_baseaddr (cfa_offset, align: &align);
7590 mem = gen_frame_mem (mode, addr);
7591
7592 /* The location aligment depends upon the base register. */
7593 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7594 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7595 set_mem_align (mem, align);
7596
7597 insn = emit_insn (gen_rtx_SET (mem, reg));
7598 RTX_FRAME_RELATED_P (insn) = 1;
7599
7600 base = addr;
7601 if (GET_CODE (base) == PLUS)
7602 base = XEXP (base, 0);
7603 gcc_checking_assert (REG_P (base));
7604
7605 /* When saving registers into a re-aligned local stack frame, avoid
7606 any tricky guessing by dwarf2out. */
7607 if (m->fs.realigned)
7608 {
7609 gcc_checking_assert (stack_realign_drap);
7610
7611 if (regno == REGNO (crtl->drap_reg))
7612 {
7613 /* A bit of a hack. We force the DRAP register to be saved in
7614 the re-aligned stack frame, which provides us with a copy
7615 of the CFA that will last past the prologue. Install it. */
7616 gcc_checking_assert (cfun->machine->fs.fp_valid);
7617 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7618 cfun->machine->fs.fp_offset - cfa_offset);
7619 mem = gen_rtx_MEM (mode, addr);
7620 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7621 }
7622 else
7623 {
7624 /* The frame pointer is a stable reference within the
7625 aligned frame. Use it. */
7626 gcc_checking_assert (cfun->machine->fs.fp_valid);
7627 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7628 cfun->machine->fs.fp_offset - cfa_offset);
7629 mem = gen_rtx_MEM (mode, addr);
7630 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7631 }
7632 }
7633
7634 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7635 && cfa_offset >= m->fs.sp_realigned_offset)
7636 {
7637 gcc_checking_assert (stack_realign_fp);
7638 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7639 }
7640
7641 /* The memory may not be relative to the current CFA register,
7642 which means that we may need to generate a new pattern for
7643 use by the unwind info. */
7644 else if (base != m->fs.cfa_reg)
7645 {
7646 addr = plus_constant (Pmode, m->fs.cfa_reg,
7647 m->fs.cfa_offset - cfa_offset);
7648 mem = gen_rtx_MEM (mode, addr);
7649 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7650 }
7651}
7652
7653/* Emit code to save registers using MOV insns.
7654 First register is stored at CFA - CFA_OFFSET. */
7655static void
7656ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7657{
7658 unsigned int regno;
7659
7660 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7661 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7662 {
7663 ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset);
7664 cfa_offset -= UNITS_PER_WORD;
7665 }
7666}
7667
7668/* Emit code to save SSE registers using MOV insns.
7669 First register is stored at CFA - CFA_OFFSET. */
7670static void
7671ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7672{
7673 unsigned int regno;
7674
7675 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7676 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7677 {
7678 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7679 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7680 }
7681}
7682
7683static GTY(()) rtx queued_cfa_restores;
7684
7685/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7686 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7687 Don't add the note if the previously saved value will be left untouched
7688 within stack red-zone till return, as unwinders can find the same value
7689 in the register and on the stack. */
7690
7691static void
7692ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7693{
7694 if (!crtl->shrink_wrapped
7695 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7696 return;
7697
7698 if (insn)
7699 {
7700 add_reg_note (insn, REG_CFA_RESTORE, reg);
7701 RTX_FRAME_RELATED_P (insn) = 1;
7702 }
7703 else
7704 queued_cfa_restores
7705 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7706}
7707
7708/* Add queued REG_CFA_RESTORE notes if any to INSN. */
7709
7710static void
7711ix86_add_queued_cfa_restore_notes (rtx insn)
7712{
7713 rtx last;
7714 if (!queued_cfa_restores)
7715 return;
7716 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7717 ;
7718 XEXP (last, 1) = REG_NOTES (insn);
7719 REG_NOTES (insn) = queued_cfa_restores;
7720 queued_cfa_restores = NULL_RTX;
7721 RTX_FRAME_RELATED_P (insn) = 1;
7722}
7723
7724/* Expand prologue or epilogue stack adjustment.
7725 The pattern exist to put a dependency on all ebp-based memory accesses.
7726 STYLE should be negative if instructions should be marked as frame related,
7727 zero if %r11 register is live and cannot be freely used and positive
7728 otherwise. */
7729
7730static rtx
7731pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7732 int style, bool set_cfa)
7733{
7734 struct machine_function *m = cfun->machine;
7735 rtx addend = offset;
7736 rtx insn;
7737 bool add_frame_related_expr = false;
7738
7739 if (!x86_64_immediate_operand (offset, Pmode))
7740 {
7741 /* r11 is used by indirect sibcall return as well, set before the
7742 epilogue and used after the epilogue. */
7743 if (style)
7744 addend = gen_rtx_REG (Pmode, R11_REG);
7745 else
7746 {
7747 gcc_assert (src != hard_frame_pointer_rtx
7748 && dest != hard_frame_pointer_rtx);
7749 addend = hard_frame_pointer_rtx;
7750 }
7751 emit_insn (gen_rtx_SET (addend, offset));
7752 if (style < 0)
7753 add_frame_related_expr = true;
7754 }
7755
7756 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7757 (Pmode, x0: dest, x1: src, x2: addend));
7758 if (style >= 0)
7759 ix86_add_queued_cfa_restore_notes (insn);
7760
7761 if (set_cfa)
7762 {
7763 rtx r;
7764
7765 gcc_assert (m->fs.cfa_reg == src);
7766 m->fs.cfa_offset += INTVAL (offset);
7767 m->fs.cfa_reg = dest;
7768
7769 r = gen_rtx_PLUS (Pmode, src, offset);
7770 r = gen_rtx_SET (dest, r);
7771 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7772 RTX_FRAME_RELATED_P (insn) = 1;
7773 }
7774 else if (style < 0)
7775 {
7776 RTX_FRAME_RELATED_P (insn) = 1;
7777 if (add_frame_related_expr)
7778 {
7779 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7780 r = gen_rtx_SET (dest, r);
7781 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7782 }
7783 }
7784
7785 if (dest == stack_pointer_rtx)
7786 {
7787 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7788 bool valid = m->fs.sp_valid;
7789 bool realigned = m->fs.sp_realigned;
7790
7791 if (src == hard_frame_pointer_rtx)
7792 {
7793 valid = m->fs.fp_valid;
7794 realigned = false;
7795 ooffset = m->fs.fp_offset;
7796 }
7797 else if (src == crtl->drap_reg)
7798 {
7799 valid = m->fs.drap_valid;
7800 realigned = false;
7801 ooffset = 0;
7802 }
7803 else
7804 {
7805 /* Else there are two possibilities: SP itself, which we set
7806 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7807 taken care of this by hand along the eh_return path. */
7808 gcc_checking_assert (src == stack_pointer_rtx
7809 || offset == const0_rtx);
7810 }
7811
7812 m->fs.sp_offset = ooffset - INTVAL (offset);
7813 m->fs.sp_valid = valid;
7814 m->fs.sp_realigned = realigned;
7815 }
7816 return insn;
7817}
7818
7819/* Find an available register to be used as dynamic realign argument
7820 pointer regsiter. Such a register will be written in prologue and
7821 used in begin of body, so it must not be
7822 1. parameter passing register.
7823 2. GOT pointer.
7824 We reuse static-chain register if it is available. Otherwise, we
7825 use DI for i386 and R13 for x86-64. We chose R13 since it has
7826 shorter encoding.
7827
7828 Return: the regno of chosen register. */
7829
7830static unsigned int
7831find_drap_reg (void)
7832{
7833 tree decl = cfun->decl;
7834
7835 /* Always use callee-saved register if there are no caller-saved
7836 registers. */
7837 if (TARGET_64BIT)
7838 {
7839 /* Use R13 for nested function or function need static chain.
7840 Since function with tail call may use any caller-saved
7841 registers in epilogue, DRAP must not use caller-saved
7842 register in such case. */
7843 if (DECL_STATIC_CHAIN (decl)
7844 || (cfun->machine->call_saved_registers
7845 == TYPE_NO_CALLER_SAVED_REGISTERS)
7846 || crtl->tail_call_emit)
7847 return R13_REG;
7848
7849 return R10_REG;
7850 }
7851 else
7852 {
7853 /* Use DI for nested function or function need static chain.
7854 Since function with tail call may use any caller-saved
7855 registers in epilogue, DRAP must not use caller-saved
7856 register in such case. */
7857 if (DECL_STATIC_CHAIN (decl)
7858 || (cfun->machine->call_saved_registers
7859 == TYPE_NO_CALLER_SAVED_REGISTERS)
7860 || crtl->tail_call_emit
7861 || crtl->calls_eh_return)
7862 return DI_REG;
7863
7864 /* Reuse static chain register if it isn't used for parameter
7865 passing. */
7866 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7867 {
7868 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7869 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7870 return CX_REG;
7871 }
7872 return DI_REG;
7873 }
7874}
7875
7876/* Return minimum incoming stack alignment. */
7877
7878static unsigned int
7879ix86_minimum_incoming_stack_boundary (bool sibcall)
7880{
7881 unsigned int incoming_stack_boundary;
7882
7883 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7884 if (cfun->machine->func_type != TYPE_NORMAL)
7885 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7886 /* Prefer the one specified at command line. */
7887 else if (ix86_user_incoming_stack_boundary)
7888 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7889 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7890 if -mstackrealign is used, it isn't used for sibcall check and
7891 estimated stack alignment is 128bit. */
7892 else if (!sibcall
7893 && ix86_force_align_arg_pointer
7894 && crtl->stack_alignment_estimated == 128)
7895 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7896 else
7897 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7898
7899 /* Incoming stack alignment can be changed on individual functions
7900 via force_align_arg_pointer attribute. We use the smallest
7901 incoming stack boundary. */
7902 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7903 && lookup_attribute (attr_name: "force_align_arg_pointer",
7904 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7905 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7906
7907 /* The incoming stack frame has to be aligned at least at
7908 parm_stack_boundary. */
7909 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7910 incoming_stack_boundary = crtl->parm_stack_boundary;
7911
7912 /* Stack at entrance of main is aligned by runtime. We use the
7913 smallest incoming stack boundary. */
7914 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7915 && DECL_NAME (current_function_decl)
7916 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7917 && DECL_FILE_SCOPE_P (current_function_decl))
7918 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7919
7920 return incoming_stack_boundary;
7921}
7922
7923/* Update incoming stack boundary and estimated stack alignment. */
7924
7925static void
7926ix86_update_stack_boundary (void)
7927{
7928 ix86_incoming_stack_boundary
7929 = ix86_minimum_incoming_stack_boundary (sibcall: false);
7930
7931 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7932 if (TARGET_64BIT
7933 && cfun->stdarg
7934 && crtl->stack_alignment_estimated < 128)
7935 crtl->stack_alignment_estimated = 128;
7936
7937 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7938 if (ix86_tls_descriptor_calls_expanded_in_cfun
7939 && crtl->preferred_stack_boundary < 128)
7940 crtl->preferred_stack_boundary = 128;
7941
7942 /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
7943 are 32 bits, but if force_align_arg_pointer is specified, it should
7944 prefer 128 bits for a backward-compatibility reason, which is also
7945 what the doc suggests. */
7946 if (lookup_attribute (attr_name: "force_align_arg_pointer",
7947 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
7948 && crtl->preferred_stack_boundary < 128)
7949 crtl->preferred_stack_boundary = 128;
7950}
7951
7952/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7953 needed or an rtx for DRAP otherwise. */
7954
7955static rtx
7956ix86_get_drap_rtx (void)
7957{
7958 /* We must use DRAP if there are outgoing arguments on stack or
7959 the stack pointer register is clobbered by asm statment and
7960 ACCUMULATE_OUTGOING_ARGS is false. */
7961 if (ix86_force_drap
7962 || ((cfun->machine->outgoing_args_on_stack
7963 || crtl->sp_is_clobbered_by_asm)
7964 && !ACCUMULATE_OUTGOING_ARGS))
7965 crtl->need_drap = true;
7966
7967 if (stack_realign_drap)
7968 {
7969 /* Assign DRAP to vDRAP and returns vDRAP */
7970 unsigned int regno = find_drap_reg ();
7971 rtx drap_vreg;
7972 rtx arg_ptr;
7973 rtx_insn *seq, *insn;
7974
7975 arg_ptr = gen_rtx_REG (Pmode, regno);
7976 crtl->drap_reg = arg_ptr;
7977
7978 start_sequence ();
7979 drap_vreg = copy_to_reg (arg_ptr);
7980 seq = end_sequence ();
7981
7982 insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ()));
7983 if (!optimize)
7984 {
7985 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7986 RTX_FRAME_RELATED_P (insn) = 1;
7987 }
7988 return drap_vreg;
7989 }
7990 else
7991 return NULL;
7992}
7993
7994/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7995
7996static rtx
7997ix86_internal_arg_pointer (void)
7998{
7999 return virtual_incoming_args_rtx;
8000}
8001
8002struct scratch_reg {
8003 rtx reg;
8004 bool saved;
8005};
8006
8007/* Return a short-lived scratch register for use on function entry.
8008 In 32-bit mode, it is valid only after the registers are saved
8009 in the prologue. This register must be released by means of
8010 release_scratch_register_on_entry once it is dead. */
8011
8012static void
8013get_scratch_register_on_entry (struct scratch_reg *sr)
8014{
8015 int regno;
8016
8017 sr->saved = false;
8018
8019 if (TARGET_64BIT)
8020 {
8021 /* We always use R11 in 64-bit mode. */
8022 regno = R11_REG;
8023 }
8024 else
8025 {
8026 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8027 bool fastcall_p
8028 = lookup_attribute (attr_name: "fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8029 bool thiscall_p
8030 = lookup_attribute (attr_name: "thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8031 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8032 int regparm = ix86_function_regparm (type: fntype, decl);
8033 int drap_regno
8034 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8035
8036 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8037 for the static chain register. */
8038 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8039 && drap_regno != AX_REG)
8040 regno = AX_REG;
8041 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
8042 for the static chain register. */
8043 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
8044 regno = AX_REG;
8045 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8046 regno = DX_REG;
8047 /* ecx is the static chain register. */
8048 else if (regparm < 3 && !fastcall_p && !thiscall_p
8049 && !static_chain_p
8050 && drap_regno != CX_REG)
8051 regno = CX_REG;
8052 else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false))
8053 regno = BX_REG;
8054 /* esi is the static chain register. */
8055 else if (!(regparm == 3 && static_chain_p)
8056 && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false))
8057 regno = SI_REG;
8058 else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false))
8059 regno = DI_REG;
8060 else
8061 {
8062 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8063 sr->saved = true;
8064 }
8065 }
8066
8067 sr->reg = gen_rtx_REG (Pmode, regno);
8068 if (sr->saved)
8069 {
8070 rtx_insn *insn = emit_insn (gen_push (arg: sr->reg));
8071 RTX_FRAME_RELATED_P (insn) = 1;
8072 }
8073}
8074
8075/* Release a scratch register obtained from the preceding function.
8076
8077 If RELEASE_VIA_POP is true, we just pop the register off the stack
8078 to release it. This is what non-Linux systems use with -fstack-check.
8079
8080 Otherwise we use OFFSET to locate the saved register and the
8081 allocated stack space becomes part of the local frame and is
8082 deallocated by the epilogue. */
8083
8084static void
8085release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8086 bool release_via_pop)
8087{
8088 if (sr->saved)
8089 {
8090 if (release_via_pop)
8091 {
8092 struct machine_function *m = cfun->machine;
8093 rtx x, insn = emit_insn (gen_pop (arg: sr->reg));
8094
8095 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8096 RTX_FRAME_RELATED_P (insn) = 1;
8097 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8098 x = gen_rtx_SET (stack_pointer_rtx, x);
8099 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8100 m->fs.sp_offset -= UNITS_PER_WORD;
8101 }
8102 else
8103 {
8104 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8105 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8106 emit_insn (x);
8107 }
8108 }
8109}
8110
8111/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8112
8113 If INT_REGISTERS_SAVED is true, then integer registers have already been
8114 pushed on the stack.
8115
8116 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8117 beyond SIZE bytes.
8118
8119 This assumes no knowledge of the current probing state, i.e. it is never
8120 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8121 a suitable probe. */
8122
8123static void
8124ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8125 const bool int_registers_saved,
8126 const bool protection_area)
8127{
8128 struct machine_function *m = cfun->machine;
8129
8130 /* If this function does not statically allocate stack space, then
8131 no probes are needed. */
8132 if (!size)
8133 {
8134 /* However, the allocation of space via pushes for register
8135 saves could be viewed as allocating space, but without the
8136 need to probe. */
8137 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8138 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8139 else
8140 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8141 return;
8142 }
8143
8144 /* If we are a noreturn function, then we have to consider the
8145 possibility that we're called via a jump rather than a call.
8146
8147 Thus we don't have the implicit probe generated by saving the
8148 return address into the stack at the call. Thus, the stack
8149 pointer could be anywhere in the guard page. The safe thing
8150 to do is emit a probe now.
8151
8152 The probe can be avoided if we have already emitted any callee
8153 register saves into the stack or have a frame pointer (which will
8154 have been saved as well). Those saves will function as implicit
8155 probes.
8156
8157 ?!? This should be revamped to work like aarch64 and s390 where
8158 we track the offset from the most recent probe. Normally that
8159 offset would be zero. For a noreturn function we would reset
8160 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8161 we just probe when we cross PROBE_INTERVAL. */
8162 if (TREE_THIS_VOLATILE (cfun->decl)
8163 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8164 {
8165 /* We can safely use any register here since we're just going to push
8166 its value and immediately pop it back. But we do try and avoid
8167 argument passing registers so as not to introduce dependencies in
8168 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8169 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8170 rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg));
8171 rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg));
8172 m->fs.sp_offset -= UNITS_PER_WORD;
8173 if (m->fs.cfa_reg == stack_pointer_rtx)
8174 {
8175 m->fs.cfa_offset -= UNITS_PER_WORD;
8176 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8177 x = gen_rtx_SET (stack_pointer_rtx, x);
8178 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8179 RTX_FRAME_RELATED_P (insn_push) = 1;
8180 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8181 x = gen_rtx_SET (stack_pointer_rtx, x);
8182 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8183 RTX_FRAME_RELATED_P (insn_pop) = 1;
8184 }
8185 emit_insn (gen_blockage ());
8186 }
8187
8188 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8189 const int dope = 4 * UNITS_PER_WORD;
8190
8191 /* If there is protection area, take it into account in the size. */
8192 if (protection_area)
8193 size += probe_interval + dope;
8194
8195 /* If we allocate less than the size of the guard statically,
8196 then no probing is necessary, but we do need to allocate
8197 the stack. */
8198 else if (size < (1 << param_stack_clash_protection_guard_size))
8199 {
8200 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8201 GEN_INT (-size), style: -1,
8202 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8203 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8204 return;
8205 }
8206
8207 /* We're allocating a large enough stack frame that we need to
8208 emit probes. Either emit them inline or in a loop depending
8209 on the size. */
8210 if (size <= 4 * probe_interval)
8211 {
8212 HOST_WIDE_INT i;
8213 for (i = probe_interval; i <= size; i += probe_interval)
8214 {
8215 /* Allocate PROBE_INTERVAL bytes. */
8216 rtx insn
8217 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8218 GEN_INT (-probe_interval), style: -1,
8219 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8220 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8221
8222 /* And probe at *sp. */
8223 emit_stack_probe (stack_pointer_rtx);
8224 emit_insn (gen_blockage ());
8225 }
8226
8227 /* We need to allocate space for the residual, but we do not need
8228 to probe the residual... */
8229 HOST_WIDE_INT residual = (i - probe_interval - size);
8230 if (residual)
8231 {
8232 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8233 GEN_INT (residual), style: -1,
8234 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8235
8236 /* ...except if there is a protection area to maintain. */
8237 if (protection_area)
8238 emit_stack_probe (stack_pointer_rtx);
8239 }
8240
8241 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8242 }
8243 else
8244 {
8245 /* We expect the GP registers to be saved when probes are used
8246 as the probing sequences might need a scratch register and
8247 the routine to allocate one assumes the integer registers
8248 have already been saved. */
8249 gcc_assert (int_registers_saved);
8250
8251 struct scratch_reg sr;
8252 get_scratch_register_on_entry (sr: &sr);
8253
8254 /* If we needed to save a register, then account for any space
8255 that was pushed (we are not going to pop the register when
8256 we do the restore). */
8257 if (sr.saved)
8258 size -= UNITS_PER_WORD;
8259
8260 /* Step 1: round SIZE down to a multiple of the interval. */
8261 HOST_WIDE_INT rounded_size = size & -probe_interval;
8262
8263 /* Step 2: compute final value of the loop counter. Use lea if
8264 possible. */
8265 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8266 rtx insn;
8267 if (address_no_seg_operand (addr, Pmode))
8268 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8269 else
8270 {
8271 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8272 insn = emit_insn (gen_rtx_SET (sr.reg,
8273 gen_rtx_PLUS (Pmode, sr.reg,
8274 stack_pointer_rtx)));
8275 }
8276 if (m->fs.cfa_reg == stack_pointer_rtx)
8277 {
8278 add_reg_note (insn, REG_CFA_DEF_CFA,
8279 plus_constant (Pmode, sr.reg,
8280 m->fs.cfa_offset + rounded_size));
8281 RTX_FRAME_RELATED_P (insn) = 1;
8282 }
8283
8284 /* Step 3: the loop. */
8285 rtx size_rtx = GEN_INT (rounded_size);
8286 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg,
8287 x2: size_rtx));
8288 if (m->fs.cfa_reg == stack_pointer_rtx)
8289 {
8290 m->fs.cfa_offset += rounded_size;
8291 add_reg_note (insn, REG_CFA_DEF_CFA,
8292 plus_constant (Pmode, stack_pointer_rtx,
8293 m->fs.cfa_offset));
8294 RTX_FRAME_RELATED_P (insn) = 1;
8295 }
8296 m->fs.sp_offset += rounded_size;
8297 emit_insn (gen_blockage ());
8298
8299 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8300 is equal to ROUNDED_SIZE. */
8301
8302 if (size != rounded_size)
8303 {
8304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8305 GEN_INT (rounded_size - size), style: -1,
8306 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8307
8308 if (protection_area)
8309 emit_stack_probe (stack_pointer_rtx);
8310 }
8311
8312 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8313
8314 /* This does not deallocate the space reserved for the scratch
8315 register. That will be deallocated in the epilogue. */
8316 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false);
8317 }
8318
8319 /* Adjust back to account for the protection area. */
8320 if (protection_area)
8321 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8322 GEN_INT (probe_interval + dope), style: -1,
8323 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8324
8325 /* Make sure nothing is scheduled before we are done. */
8326 emit_insn (gen_blockage ());
8327}
8328
8329/* Adjust the stack pointer up to REG while probing it. */
8330
8331const char *
8332output_adjust_stack_and_probe (rtx reg)
8333{
8334 static int labelno = 0;
8335 char loop_lab[32];
8336 rtx xops[2];
8337
8338 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8339
8340 /* Loop. */
8341 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8342
8343 /* SP = SP + PROBE_INTERVAL. */
8344 xops[0] = stack_pointer_rtx;
8345 xops[1] = GEN_INT (get_probe_interval ());
8346 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8347
8348 /* Probe at SP. */
8349 xops[1] = const0_rtx;
8350 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8351
8352 /* Test if SP == LAST_ADDR. */
8353 xops[0] = stack_pointer_rtx;
8354 xops[1] = reg;
8355 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8356
8357 /* Branch. */
8358 fputs (s: "\tjne\t", stream: asm_out_file);
8359 assemble_name_raw (asm_out_file, loop_lab);
8360 fputc (c: '\n', stream: asm_out_file);
8361
8362 return "";
8363}
8364
8365/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8366 inclusive. These are offsets from the current stack pointer.
8367
8368 INT_REGISTERS_SAVED is true if integer registers have already been
8369 pushed on the stack. */
8370
8371static void
8372ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8373 const bool int_registers_saved)
8374{
8375 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8376
8377 /* See if we have a constant small number of probes to generate. If so,
8378 that's the easy case. The run-time loop is made up of 6 insns in the
8379 generic case while the compile-time loop is made up of n insns for n #
8380 of intervals. */
8381 if (size <= 6 * probe_interval)
8382 {
8383 HOST_WIDE_INT i;
8384
8385 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8386 it exceeds SIZE. If only one probe is needed, this will not
8387 generate any code. Then probe at FIRST + SIZE. */
8388 for (i = probe_interval; i < size; i += probe_interval)
8389 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8390 -(first + i)));
8391
8392 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8393 -(first + size)));
8394 }
8395
8396 /* Otherwise, do the same as above, but in a loop. Note that we must be
8397 extra careful with variables wrapping around because we might be at
8398 the very top (or the very bottom) of the address space and we have
8399 to be able to handle this case properly; in particular, we use an
8400 equality test for the loop condition. */
8401 else
8402 {
8403 /* We expect the GP registers to be saved when probes are used
8404 as the probing sequences might need a scratch register and
8405 the routine to allocate one assumes the integer registers
8406 have already been saved. */
8407 gcc_assert (int_registers_saved);
8408
8409 HOST_WIDE_INT rounded_size, last;
8410 struct scratch_reg sr;
8411
8412 get_scratch_register_on_entry (sr: &sr);
8413
8414
8415 /* Step 1: round SIZE to the previous multiple of the interval. */
8416
8417 rounded_size = ROUND_DOWN (size, probe_interval);
8418
8419
8420 /* Step 2: compute initial and final value of the loop counter. */
8421
8422 /* TEST_OFFSET = FIRST. */
8423 emit_move_insn (sr.reg, GEN_INT (-first));
8424
8425 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8426 last = first + rounded_size;
8427
8428
8429 /* Step 3: the loop
8430
8431 do
8432 {
8433 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8434 probe at TEST_ADDR
8435 }
8436 while (TEST_ADDR != LAST_ADDR)
8437
8438 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8439 until it is equal to ROUNDED_SIZE. */
8440
8441 emit_insn
8442 (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last)));
8443
8444
8445 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8446 that SIZE is equal to ROUNDED_SIZE. */
8447
8448 if (size != rounded_size)
8449 emit_stack_probe (plus_constant (Pmode,
8450 gen_rtx_PLUS (Pmode,
8451 stack_pointer_rtx,
8452 sr.reg),
8453 rounded_size - size));
8454
8455 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true);
8456 }
8457
8458 /* Make sure nothing is scheduled before we are done. */
8459 emit_insn (gen_blockage ());
8460}
8461
8462/* Probe a range of stack addresses from REG to END, inclusive. These are
8463 offsets from the current stack pointer. */
8464
8465const char *
8466output_probe_stack_range (rtx reg, rtx end)
8467{
8468 static int labelno = 0;
8469 char loop_lab[32];
8470 rtx xops[3];
8471
8472 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8473
8474 /* Loop. */
8475 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8476
8477 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8478 xops[0] = reg;
8479 xops[1] = GEN_INT (get_probe_interval ());
8480 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8481
8482 /* Probe at TEST_ADDR. */
8483 xops[0] = stack_pointer_rtx;
8484 xops[1] = reg;
8485 xops[2] = const0_rtx;
8486 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8487
8488 /* Test if TEST_ADDR == LAST_ADDR. */
8489 xops[0] = reg;
8490 xops[1] = end;
8491 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8492
8493 /* Branch. */
8494 fputs (s: "\tjne\t", stream: asm_out_file);
8495 assemble_name_raw (asm_out_file, loop_lab);
8496 fputc (c: '\n', stream: asm_out_file);
8497
8498 return "";
8499}
8500
8501/* Data passed to ix86_update_stack_alignment. */
8502struct stack_access_data
8503{
8504 /* The stack access register. */
8505 const_rtx reg;
8506 /* Pointer to stack alignment. */
8507 unsigned int *stack_alignment;
8508};
8509
8510/* Update the maximum stack slot alignment from memory alignment in PAT. */
8511
8512static void
8513ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
8514{
8515 /* This insn may reference stack slot. Update the maximum stack slot
8516 alignment if the memory is referenced by the stack access register. */
8517 stack_access_data *p = (stack_access_data *) data;
8518
8519 subrtx_iterator::array_type array;
8520 FOR_EACH_SUBRTX (iter, array, pat, ALL)
8521 {
8522 auto op = *iter;
8523 if (MEM_P (op))
8524 {
8525 if (reg_mentioned_p (p->reg, XEXP (op, 0)))
8526 {
8527 unsigned int alignment = MEM_ALIGN (op);
8528
8529 if (alignment > *p->stack_alignment)
8530 *p->stack_alignment = alignment;
8531 break;
8532 }
8533 else
8534 iter.skip_subrtxes ();
8535 }
8536 }
8537}
8538
8539/* Helper function for ix86_find_all_reg_uses. */
8540
8541static void
8542ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
8543 rtx set, unsigned int regno,
8544 auto_bitmap &worklist)
8545{
8546 rtx dest = SET_DEST (set);
8547
8548 if (!REG_P (dest))
8549 return;
8550
8551 /* Reject non-Pmode modes. */
8552 if (GET_MODE (dest) != Pmode)
8553 return;
8554
8555 unsigned int dst_regno = REGNO (dest);
8556
8557 if (TEST_HARD_REG_BIT (set: regset, bit: dst_regno))
8558 return;
8559
8560 const_rtx src = SET_SRC (set);
8561
8562 subrtx_iterator::array_type array;
8563 FOR_EACH_SUBRTX (iter, array, src, ALL)
8564 {
8565 auto op = *iter;
8566
8567 if (MEM_P (op))
8568 iter.skip_subrtxes ();
8569
8570 if (REG_P (op) && REGNO (op) == regno)
8571 {
8572 /* Add this register to register set. */
8573 add_to_hard_reg_set (regs: &regset, Pmode, regno: dst_regno);
8574 bitmap_set_bit (worklist, dst_regno);
8575 break;
8576 }
8577 }
8578}
8579
8580/* Find all registers defined with register REGNO. */
8581
8582static void
8583ix86_find_all_reg_uses (HARD_REG_SET &regset,
8584 unsigned int regno, auto_bitmap &worklist)
8585{
8586 for (df_ref ref = DF_REG_USE_CHAIN (regno);
8587 ref != NULL;
8588 ref = DF_REF_NEXT_REG (ref))
8589 {
8590 if (DF_REF_IS_ARTIFICIAL (ref))
8591 continue;
8592
8593 rtx_insn *insn = DF_REF_INSN (ref);
8594
8595 if (!NONJUMP_INSN_P (insn))
8596 continue;
8597
8598 unsigned int ref_regno = DF_REF_REGNO (ref);
8599
8600 rtx set = single_set (insn);
8601 if (set)
8602 {
8603 ix86_find_all_reg_uses_1 (regset, set,
8604 regno: ref_regno, worklist);
8605 continue;
8606 }
8607
8608 rtx pat = PATTERN (insn);
8609 if (GET_CODE (pat) != PARALLEL)
8610 continue;
8611
8612 for (int i = 0; i < XVECLEN (pat, 0); i++)
8613 {
8614 rtx exp = XVECEXP (pat, 0, i);
8615
8616 if (GET_CODE (exp) == SET)
8617 ix86_find_all_reg_uses_1 (regset, set: exp,
8618 regno: ref_regno, worklist);
8619 }
8620 }
8621}
8622
8623/* Set stack_frame_required to false if stack frame isn't required.
8624 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8625 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8626
8627static void
8628ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8629 bool check_stack_slot)
8630{
8631 HARD_REG_SET set_up_by_prologue, prologue_used;
8632 basic_block bb;
8633
8634 CLEAR_HARD_REG_SET (set&: prologue_used);
8635 CLEAR_HARD_REG_SET (set&: set_up_by_prologue);
8636 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8637 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8638 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode,
8639 HARD_FRAME_POINTER_REGNUM);
8640
8641 bool require_stack_frame = false;
8642
8643 FOR_EACH_BB_FN (bb, cfun)
8644 {
8645 rtx_insn *insn;
8646 FOR_BB_INSNS (bb, insn)
8647 if (NONDEBUG_INSN_P (insn)
8648 && requires_stack_frame_p (insn, prologue_used,
8649 set_up_by_prologue))
8650 {
8651 require_stack_frame = true;
8652 break;
8653 }
8654 }
8655
8656 cfun->machine->stack_frame_required = require_stack_frame;
8657
8658 /* Stop if we don't need to check stack slot. */
8659 if (!check_stack_slot)
8660 return;
8661
8662 /* The preferred stack alignment is the minimum stack alignment. */
8663 if (stack_alignment > crtl->preferred_stack_boundary)
8664 stack_alignment = crtl->preferred_stack_boundary;
8665
8666 HARD_REG_SET stack_slot_access;
8667 CLEAR_HARD_REG_SET (set&: stack_slot_access);
8668
8669 /* Stack slot can be accessed by stack pointer, frame pointer or
8670 registers defined by stack pointer or frame pointer. */
8671 auto_bitmap worklist;
8672
8673 add_to_hard_reg_set (regs: &stack_slot_access, Pmode, STACK_POINTER_REGNUM);
8674 bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
8675
8676 if (frame_pointer_needed)
8677 {
8678 add_to_hard_reg_set (regs: &stack_slot_access, Pmode,
8679 HARD_FRAME_POINTER_REGNUM);
8680 bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
8681 }
8682
8683 unsigned int regno;
8684
8685 do
8686 {
8687 regno = bitmap_clear_first_set_bit (worklist);
8688 ix86_find_all_reg_uses (regset&: stack_slot_access, regno, worklist);
8689 }
8690 while (!bitmap_empty_p (map: worklist));
8691
8692 hard_reg_set_iterator hrsi;
8693 stack_access_data data;
8694
8695 data.stack_alignment = &stack_alignment;
8696
8697 EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
8698 for (df_ref ref = DF_REG_USE_CHAIN (regno);
8699 ref != NULL;
8700 ref = DF_REF_NEXT_REG (ref))
8701 {
8702 if (DF_REF_IS_ARTIFICIAL (ref))
8703 continue;
8704
8705 rtx_insn *insn = DF_REF_INSN (ref);
8706
8707 if (!NONJUMP_INSN_P (insn))
8708 continue;
8709
8710 data.reg = DF_REF_REG (ref);
8711 note_stores (insn, ix86_update_stack_alignment, &data);
8712 }
8713}
8714
8715/* Finalize stack_realign_needed and frame_pointer_needed flags, which
8716 will guide prologue/epilogue to be generated in correct form. */
8717
8718static void
8719ix86_finalize_stack_frame_flags (void)
8720{
8721 /* Check if stack realign is really needed after reload, and
8722 stores result in cfun */
8723 unsigned int incoming_stack_boundary
8724 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8725 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8726 unsigned int stack_alignment
8727 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8728 ? crtl->max_used_stack_slot_alignment
8729 : crtl->stack_alignment_needed);
8730 unsigned int stack_realign
8731 = (incoming_stack_boundary < stack_alignment);
8732 bool recompute_frame_layout_p = false;
8733
8734 if (crtl->stack_realign_finalized)
8735 {
8736 /* After stack_realign_needed is finalized, we can't no longer
8737 change it. */
8738 gcc_assert (crtl->stack_realign_needed == stack_realign);
8739 return;
8740 }
8741
8742 /* It is always safe to compute max_used_stack_alignment. We
8743 compute it only if 128-bit aligned load/store may be generated
8744 on misaligned stack slot which will lead to segfault. */
8745 bool check_stack_slot
8746 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8747 ix86_find_max_used_stack_alignment (stack_alignment,
8748 check_stack_slot);
8749
8750 /* If the only reason for frame_pointer_needed is that we conservatively
8751 assumed stack realignment might be needed or -fno-omit-frame-pointer
8752 is used, but in the end nothing that needed the stack alignment had
8753 been spilled nor stack access, clear frame_pointer_needed and say we
8754 don't need stack realignment.
8755
8756 When vector register is used for piecewise move and store, we don't
8757 increase stack_alignment_needed as there is no register spill for
8758 piecewise move and store. Since stack_realign_needed is set to true
8759 by checking stack_alignment_estimated which is updated by pseudo
8760 vector register usage, we also need to check stack_realign_needed to
8761 eliminate frame pointer. */
8762 if ((stack_realign
8763 || (!flag_omit_frame_pointer && optimize)
8764 || crtl->stack_realign_needed)
8765 && frame_pointer_needed
8766 && crtl->is_leaf
8767 && crtl->sp_is_unchanging
8768 && !ix86_current_function_calls_tls_descriptor
8769 && !crtl->accesses_prior_frames
8770 && !cfun->calls_alloca
8771 && !crtl->calls_eh_return
8772 /* See ira_setup_eliminable_regset for the rationale. */
8773 && !(STACK_CHECK_MOVING_SP
8774 && flag_stack_check
8775 && flag_exceptions
8776 && cfun->can_throw_non_call_exceptions)
8777 && !ix86_frame_pointer_required ()
8778 && ix86_get_frame_size () == 0
8779 && ix86_nsaved_sseregs () == 0
8780 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8781 {
8782 if (cfun->machine->stack_frame_required)
8783 {
8784 /* Stack frame is required. If stack alignment needed is less
8785 than incoming stack boundary, don't realign stack. */
8786 stack_realign = incoming_stack_boundary < stack_alignment;
8787 if (!stack_realign)
8788 {
8789 crtl->max_used_stack_slot_alignment
8790 = incoming_stack_boundary;
8791 crtl->stack_alignment_needed
8792 = incoming_stack_boundary;
8793 /* Also update preferred_stack_boundary for leaf
8794 functions. */
8795 crtl->preferred_stack_boundary
8796 = incoming_stack_boundary;
8797 }
8798 }
8799 else
8800 {
8801 /* If drap has been set, but it actually isn't live at the
8802 start of the function, there is no reason to set it up. */
8803 if (crtl->drap_reg)
8804 {
8805 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8806 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8807 REGNO (crtl->drap_reg)))
8808 {
8809 crtl->drap_reg = NULL_RTX;
8810 crtl->need_drap = false;
8811 }
8812 }
8813 else
8814 cfun->machine->no_drap_save_restore = true;
8815
8816 frame_pointer_needed = false;
8817 stack_realign = false;
8818 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8819 crtl->stack_alignment_needed = incoming_stack_boundary;
8820 crtl->stack_alignment_estimated = incoming_stack_boundary;
8821 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8822 crtl->preferred_stack_boundary = incoming_stack_boundary;
8823 df_finish_pass (true);
8824 df_scan_alloc (NULL);
8825 df_scan_blocks ();
8826 df_compute_regs_ever_live (true);
8827 df_analyze ();
8828
8829 if (flag_var_tracking)
8830 {
8831 /* Since frame pointer is no longer available, replace it with
8832 stack pointer - UNITS_PER_WORD in debug insns. */
8833 df_ref ref, next;
8834 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8835 ref; ref = next)
8836 {
8837 next = DF_REF_NEXT_REG (ref);
8838 if (!DF_REF_INSN_INFO (ref))
8839 continue;
8840
8841 /* Make sure the next ref is for a different instruction,
8842 so that we're not affected by the rescan. */
8843 rtx_insn *insn = DF_REF_INSN (ref);
8844 while (next && DF_REF_INSN (next) == insn)
8845 next = DF_REF_NEXT_REG (next);
8846
8847 if (DEBUG_INSN_P (insn))
8848 {
8849 bool changed = false;
8850 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8851 {
8852 rtx *loc = DF_REF_LOC (ref);
8853 if (*loc == hard_frame_pointer_rtx)
8854 {
8855 *loc = plus_constant (Pmode,
8856 stack_pointer_rtx,
8857 -UNITS_PER_WORD);
8858 changed = true;
8859 }
8860 }
8861 if (changed)
8862 df_insn_rescan (insn);
8863 }
8864 }
8865 }
8866
8867 recompute_frame_layout_p = true;
8868 }
8869 }
8870 else if (crtl->max_used_stack_slot_alignment >= 128
8871 && cfun->machine->stack_frame_required)
8872 {
8873 /* We don't need to realign stack. max_used_stack_alignment is
8874 used to decide how stack frame should be aligned. This is
8875 independent of any psABIs nor 32-bit vs 64-bit. */
8876 cfun->machine->max_used_stack_alignment
8877 = stack_alignment / BITS_PER_UNIT;
8878 }
8879
8880 if (crtl->stack_realign_needed != stack_realign)
8881 recompute_frame_layout_p = true;
8882 crtl->stack_realign_needed = stack_realign;
8883 crtl->stack_realign_finalized = true;
8884 if (recompute_frame_layout_p)
8885 ix86_compute_frame_layout ();
8886}
8887
8888/* Delete SET_GOT right after entry block if it is allocated to reg. */
8889
8890static void
8891ix86_elim_entry_set_got (rtx reg)
8892{
8893 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8894 rtx_insn *c_insn = BB_HEAD (bb);
8895 if (!NONDEBUG_INSN_P (c_insn))
8896 c_insn = next_nonnote_nondebug_insn (c_insn);
8897 if (c_insn && NONJUMP_INSN_P (c_insn))
8898 {
8899 rtx pat = PATTERN (insn: c_insn);
8900 if (GET_CODE (pat) == PARALLEL)
8901 {
8902 rtx set = XVECEXP (pat, 0, 0);
8903 if (GET_CODE (set) == SET
8904 && GET_CODE (SET_SRC (set)) == UNSPEC
8905 && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
8906 && REGNO (SET_DEST (set)) == REGNO (reg))
8907 delete_insn (c_insn);
8908 }
8909 }
8910}
8911
8912static rtx
8913gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8914{
8915 rtx addr, mem;
8916
8917 if (offset)
8918 addr = plus_constant (Pmode, frame_reg, offset);
8919 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8920 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8921}
8922
8923static inline rtx
8924gen_frame_load (rtx reg, rtx frame_reg, int offset)
8925{
8926 return gen_frame_set (reg, frame_reg, offset, store: false);
8927}
8928
8929static inline rtx
8930gen_frame_store (rtx reg, rtx frame_reg, int offset)
8931{
8932 return gen_frame_set (reg, frame_reg, offset, store: true);
8933}
8934
8935static void
8936ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8937{
8938 struct machine_function *m = cfun->machine;
8939 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8940 + m->call_ms2sysv_extra_regs;
8941 rtvec v = rtvec_alloc (ncregs + 1);
8942 unsigned int align, i, vi = 0;
8943 rtx_insn *insn;
8944 rtx sym, addr;
8945 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8946 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8947
8948 /* AL should only be live with sysv_abi. */
8949 gcc_assert (!ix86_eax_live_at_start_p ());
8950 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8951
8952 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8953 we've actually realigned the stack or not. */
8954 align = GET_MODE_ALIGNMENT (V4SFmode);
8955 addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset
8956 + xlogue.get_stub_ptr_offset (), align: &align, AX_REG);
8957 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8958
8959 emit_insn (gen_rtx_SET (rax, addr));
8960
8961 /* Get the stub symbol. */
8962 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8963 : XLOGUE_STUB_SAVE);
8964 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8965
8966 for (i = 0; i < ncregs; ++i)
8967 {
8968 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
8969 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8970 r.regno);
8971 RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset);
8972 }
8973
8974 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8975
8976 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8977 RTX_FRAME_RELATED_P (insn) = true;
8978}
8979
8980/* Generate and return an insn body to AND X with Y. */
8981
8982static rtx_insn *
8983gen_and2_insn (rtx x, rtx y)
8984{
8985 enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x));
8986
8987 gcc_assert (insn_operand_matches (icode, 0, x));
8988 gcc_assert (insn_operand_matches (icode, 1, x));
8989 gcc_assert (insn_operand_matches (icode, 2, y));
8990
8991 return GEN_FCN (icode) (x, x, y);
8992}
8993
8994/* Expand the prologue into a bunch of separate insns. */
8995
8996void
8997ix86_expand_prologue (void)
8998{
8999 struct machine_function *m = cfun->machine;
9000 rtx insn, t;
9001 HOST_WIDE_INT allocate;
9002 bool int_registers_saved;
9003 bool sse_registers_saved;
9004 bool save_stub_call_needed;
9005 rtx static_chain = NULL_RTX;
9006
9007 ix86_last_zero_store_uid = 0;
9008 if (ix86_function_naked (fn: current_function_decl))
9009 {
9010 if (flag_stack_usage_info)
9011 current_function_static_stack_size = 0;
9012 return;
9013 }
9014
9015 ix86_finalize_stack_frame_flags ();
9016
9017 /* DRAP should not coexist with stack_realign_fp */
9018 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9019
9020 memset (s: &m->fs, c: 0, n: sizeof (m->fs));
9021
9022 /* Initialize CFA state for before the prologue. */
9023 m->fs.cfa_reg = stack_pointer_rtx;
9024 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9025
9026 /* Track SP offset to the CFA. We continue tracking this after we've
9027 swapped the CFA register away from SP. In the case of re-alignment
9028 this is fudged; we're interested to offsets within the local frame. */
9029 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9030 m->fs.sp_valid = true;
9031 m->fs.sp_realigned = false;
9032
9033 const struct ix86_frame &frame = cfun->machine->frame;
9034
9035 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl))
9036 {
9037 /* We should have already generated an error for any use of
9038 ms_hook on a nested function. */
9039 gcc_checking_assert (!ix86_static_chain_on_stack);
9040
9041 /* Check if profiling is active and we shall use profiling before
9042 prologue variant. If so sorry. */
9043 if (crtl->profile && flag_fentry != 0)
9044 sorry ("%<ms_hook_prologue%> attribute is not compatible "
9045 "with %<-mfentry%> for 32-bit");
9046
9047 /* In ix86_asm_output_function_label we emitted:
9048 8b ff movl.s %edi,%edi
9049 55 push %ebp
9050 8b ec movl.s %esp,%ebp
9051
9052 This matches the hookable function prologue in Win32 API
9053 functions in Microsoft Windows XP Service Pack 2 and newer.
9054 Wine uses this to enable Windows apps to hook the Win32 API
9055 functions provided by Wine.
9056
9057 What that means is that we've already set up the frame pointer. */
9058
9059 if (frame_pointer_needed
9060 && !(crtl->drap_reg && crtl->stack_realign_needed))
9061 {
9062 rtx push, mov;
9063
9064 /* We've decided to use the frame pointer already set up.
9065 Describe this to the unwinder by pretending that both
9066 push and mov insns happen right here.
9067
9068 Putting the unwind info here at the end of the ms_hook
9069 is done so that we can make absolutely certain we get
9070 the required byte sequence at the start of the function,
9071 rather than relying on an assembler that can produce
9072 the exact encoding required.
9073
9074 However it does mean (in the unpatched case) that we have
9075 a 1 insn window where the asynchronous unwind info is
9076 incorrect. However, if we placed the unwind info at
9077 its correct location we would have incorrect unwind info
9078 in the patched case. Which is probably all moot since
9079 I don't expect Wine generates dwarf2 unwind info for the
9080 system libraries that use this feature. */
9081
9082 insn = emit_insn (gen_blockage ());
9083
9084 push = gen_push (hard_frame_pointer_rtx);
9085 mov = gen_rtx_SET (hard_frame_pointer_rtx,
9086 stack_pointer_rtx);
9087 RTX_FRAME_RELATED_P (push) = 1;
9088 RTX_FRAME_RELATED_P (mov) = 1;
9089
9090 RTX_FRAME_RELATED_P (insn) = 1;
9091 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9092 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9093
9094 /* Note that gen_push incremented m->fs.cfa_offset, even
9095 though we didn't emit the push insn here. */
9096 m->fs.cfa_reg = hard_frame_pointer_rtx;
9097 m->fs.fp_offset = m->fs.cfa_offset;
9098 m->fs.fp_valid = true;
9099 }
9100 else
9101 {
9102 /* The frame pointer is not needed so pop %ebp again.
9103 This leaves us with a pristine state. */
9104 emit_insn (gen_pop (hard_frame_pointer_rtx));
9105 }
9106 }
9107
9108 /* The first insn of a function that accepts its static chain on the
9109 stack is to push the register that would be filled in by a direct
9110 call. This insn will be skipped by the trampoline. */
9111 else if (ix86_static_chain_on_stack)
9112 {
9113 static_chain = ix86_static_chain (cfun->decl, false);
9114 insn = emit_insn (gen_push (arg: static_chain));
9115 emit_insn (gen_blockage ());
9116
9117 /* We don't want to interpret this push insn as a register save,
9118 only as a stack adjustment. The real copy of the register as
9119 a save will be done later, if needed. */
9120 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
9121 t = gen_rtx_SET (stack_pointer_rtx, t);
9122 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9123 RTX_FRAME_RELATED_P (insn) = 1;
9124 }
9125
9126 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9127 of DRAP is needed and stack realignment is really needed after reload */
9128 if (stack_realign_drap)
9129 {
9130 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9131
9132 /* Can't use DRAP in interrupt function. */
9133 if (cfun->machine->func_type != TYPE_NORMAL)
9134 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
9135 "in interrupt service routine. This may be worked "
9136 "around by avoiding functions with aggregate return.");
9137
9138 /* Only need to push parameter pointer reg if it is caller saved. */
9139 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9140 {
9141 /* Push arg pointer reg */
9142 insn = emit_insn (gen_push (crtl->drap_reg));
9143 RTX_FRAME_RELATED_P (insn) = 1;
9144 }
9145
9146 /* Grab the argument pointer. */
9147 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
9148 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9149 RTX_FRAME_RELATED_P (insn) = 1;
9150 m->fs.cfa_reg = crtl->drap_reg;
9151 m->fs.cfa_offset = 0;
9152
9153 /* Align the stack. */
9154 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
9155 GEN_INT (-align_bytes)));
9156 RTX_FRAME_RELATED_P (insn) = 1;
9157
9158 /* Replicate the return address on the stack so that return
9159 address can be reached via (argp - 1) slot. This is needed
9160 to implement macro RETURN_ADDR_RTX and intrinsic function
9161 expand_builtin_return_addr etc. */
9162 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
9163 t = gen_frame_mem (word_mode, t);
9164 insn = emit_insn (gen_push (arg: t));
9165 RTX_FRAME_RELATED_P (insn) = 1;
9166
9167 /* For the purposes of frame and register save area addressing,
9168 we've started over with a new frame. */
9169 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9170 m->fs.realigned = true;
9171
9172 if (static_chain)
9173 {
9174 /* Replicate static chain on the stack so that static chain
9175 can be reached via (argp - 2) slot. This is needed for
9176 nested function with stack realignment. */
9177 insn = emit_insn (gen_push (arg: static_chain));
9178 RTX_FRAME_RELATED_P (insn) = 1;
9179 }
9180 }
9181
9182 int_registers_saved = (frame.nregs == 0);
9183 sse_registers_saved = (frame.nsseregs == 0);
9184 save_stub_call_needed = (m->call_ms2sysv);
9185 gcc_assert (sse_registers_saved || !save_stub_call_needed);
9186
9187 if (frame_pointer_needed && !m->fs.fp_valid)
9188 {
9189 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9190 slower on all targets. Also sdb didn't like it. */
9191 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9192 RTX_FRAME_RELATED_P (insn) = 1;
9193
9194 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9195 {
9196 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9197 RTX_FRAME_RELATED_P (insn) = 1;
9198
9199 if (m->fs.cfa_reg == stack_pointer_rtx)
9200 m->fs.cfa_reg = hard_frame_pointer_rtx;
9201 m->fs.fp_offset = m->fs.sp_offset;
9202 m->fs.fp_valid = true;
9203 }
9204 }
9205
9206 if (!int_registers_saved)
9207 {
9208 /* If saving registers via PUSH, do so now. */
9209 if (!frame.save_regs_using_mov)
9210 {
9211 ix86_emit_save_regs ();
9212 m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9213 int_registers_saved = true;
9214 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9215 }
9216
9217 /* When using red zone we may start register saving before allocating
9218 the stack frame saving one cycle of the prologue. However, avoid
9219 doing this if we have to probe the stack; at least on x86_64 the
9220 stack probe can turn into a call that clobbers a red zone location. */
9221 else if (ix86_using_red_zone ()
9222 && (! TARGET_STACK_PROBE
9223 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9224 {
9225 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
9226 cfun->machine->red_zone_used = true;
9227 int_registers_saved = true;
9228 }
9229 }
9230
9231 if (frame.red_zone_size != 0)
9232 cfun->machine->red_zone_used = true;
9233
9234 if (stack_realign_fp)
9235 {
9236 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9237 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9238
9239 /* Record last valid frame pointer offset. */
9240 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9241
9242 /* The computation of the size of the re-aligned stack frame means
9243 that we must allocate the size of the register save area before
9244 performing the actual alignment. Otherwise we cannot guarantee
9245 that there's enough storage above the realignment point. */
9246 allocate = frame.reg_save_offset - m->fs.sp_offset
9247 + frame.stack_realign_allocate;
9248 if (allocate)
9249 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9250 GEN_INT (-allocate), style: -1, set_cfa: false);
9251
9252 /* Align the stack. */
9253 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9254 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9255 m->fs.sp_realigned_offset = m->fs.sp_offset
9256 - frame.stack_realign_allocate;
9257 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9258 Beyond this point, stack access should be done via choose_baseaddr or
9259 by using sp_valid_at and fp_valid_at to determine the correct base
9260 register. Henceforth, any CFA offset should be thought of as logical
9261 and not physical. */
9262 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9263 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9264 m->fs.sp_realigned = true;
9265
9266 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9267 is needed to describe where a register is saved using a realigned
9268 stack pointer, so we need to invalidate the stack pointer for that
9269 target. */
9270 if (TARGET_SEH)
9271 m->fs.sp_valid = false;
9272
9273 /* If SP offset is non-immediate after allocation of the stack frame,
9274 then emit SSE saves or stub call prior to allocating the rest of the
9275 stack frame. This is less efficient for the out-of-line stub because
9276 we can't combine allocations across the call barrier, but it's better
9277 than using a scratch register. */
9278 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9279 - m->fs.sp_realigned_offset),
9280 Pmode))
9281 {
9282 if (!sse_registers_saved)
9283 {
9284 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9285 sse_registers_saved = true;
9286 }
9287 else if (save_stub_call_needed)
9288 {
9289 ix86_emit_outlined_ms2sysv_save (frame);
9290 save_stub_call_needed = false;
9291 }
9292 }
9293 }
9294
9295 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9296
9297 if (flag_stack_usage_info)
9298 {
9299 /* We start to count from ARG_POINTER. */
9300 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9301
9302 /* If it was realigned, take into account the fake frame. */
9303 if (stack_realign_drap)
9304 {
9305 if (ix86_static_chain_on_stack)
9306 stack_size += UNITS_PER_WORD;
9307
9308 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9309 stack_size += UNITS_PER_WORD;
9310
9311 /* This over-estimates by 1 minimal-stack-alignment-unit but
9312 mitigates that by counting in the new return address slot. */
9313 current_function_dynamic_stack_size
9314 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9315 }
9316
9317 current_function_static_stack_size = stack_size;
9318 }
9319
9320 /* On SEH target with very large frame size, allocate an area to save
9321 SSE registers (as the very large allocation won't be described). */
9322 if (TARGET_SEH
9323 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9324 && !sse_registers_saved)
9325 {
9326 HOST_WIDE_INT sse_size
9327 = frame.sse_reg_save_offset - frame.reg_save_offset;
9328
9329 gcc_assert (int_registers_saved);
9330
9331 /* No need to do stack checking as the area will be immediately
9332 written. */
9333 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9334 GEN_INT (-sse_size), style: -1,
9335 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9336 allocate -= sse_size;
9337 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9338 sse_registers_saved = true;
9339 }
9340
9341 /* If stack clash protection is requested, then probe the stack, unless it
9342 is already probed on the target. */
9343 if (allocate >= 0
9344 && flag_stack_clash_protection
9345 && !ix86_target_stack_probe ())
9346 {
9347 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false);
9348 allocate = 0;
9349 }
9350
9351 /* The stack has already been decremented by the instruction calling us
9352 so probe if the size is non-negative to preserve the protection area. */
9353 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9354 {
9355 const HOST_WIDE_INT probe_interval = get_probe_interval ();
9356
9357 if (STACK_CHECK_MOVING_SP)
9358 {
9359 if (crtl->is_leaf
9360 && !cfun->calls_alloca
9361 && allocate <= probe_interval)
9362 ;
9363
9364 else
9365 {
9366 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true);
9367 allocate = 0;
9368 }
9369 }
9370
9371 else
9372 {
9373 HOST_WIDE_INT size = allocate;
9374
9375 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9376 size = 0x80000000 - get_stack_check_protect () - 1;
9377
9378 if (TARGET_STACK_PROBE)
9379 {
9380 if (crtl->is_leaf && !cfun->calls_alloca)
9381 {
9382 if (size > probe_interval)
9383 ix86_emit_probe_stack_range (first: 0, size, int_registers_saved);
9384 }
9385 else
9386 ix86_emit_probe_stack_range (first: 0,
9387 size: size + get_stack_check_protect (),
9388 int_registers_saved);
9389 }
9390 else
9391 {
9392 if (crtl->is_leaf && !cfun->calls_alloca)
9393 {
9394 if (size > probe_interval
9395 && size > get_stack_check_protect ())
9396 ix86_emit_probe_stack_range (first: get_stack_check_protect (),
9397 size: (size
9398 - get_stack_check_protect ()),
9399 int_registers_saved);
9400 }
9401 else
9402 ix86_emit_probe_stack_range (first: get_stack_check_protect (), size,
9403 int_registers_saved);
9404 }
9405 }
9406 }
9407
9408 if (allocate == 0)
9409 ;
9410 else if (!ix86_target_stack_probe ()
9411 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9412 {
9413 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9414 GEN_INT (-allocate), style: -1,
9415 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9416 }
9417 else
9418 {
9419 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9420 rtx r10 = NULL;
9421 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9422 bool eax_live = ix86_eax_live_at_start_p ();
9423 bool r10_live = false;
9424
9425 if (TARGET_64BIT)
9426 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9427
9428 if (eax_live)
9429 {
9430 insn = emit_insn (gen_push (arg: eax));
9431 allocate -= UNITS_PER_WORD;
9432 /* Note that SEH directives need to continue tracking the stack
9433 pointer even after the frame pointer has been set up. */
9434 if (sp_is_cfa_reg || TARGET_SEH)
9435 {
9436 if (sp_is_cfa_reg)
9437 m->fs.cfa_offset += UNITS_PER_WORD;
9438 RTX_FRAME_RELATED_P (insn) = 1;
9439 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9440 gen_rtx_SET (stack_pointer_rtx,
9441 plus_constant (Pmode,
9442 stack_pointer_rtx,
9443 -UNITS_PER_WORD)));
9444 }
9445 }
9446
9447 if (r10_live)
9448 {
9449 r10 = gen_rtx_REG (Pmode, R10_REG);
9450 insn = emit_insn (gen_push (arg: r10));
9451 allocate -= UNITS_PER_WORD;
9452 if (sp_is_cfa_reg || TARGET_SEH)
9453 {
9454 if (sp_is_cfa_reg)
9455 m->fs.cfa_offset += UNITS_PER_WORD;
9456 RTX_FRAME_RELATED_P (insn) = 1;
9457 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9458 gen_rtx_SET (stack_pointer_rtx,
9459 plus_constant (Pmode,
9460 stack_pointer_rtx,
9461 -UNITS_PER_WORD)));
9462 }
9463 }
9464
9465 emit_move_insn (eax, GEN_INT (allocate));
9466 emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax));
9467
9468 /* Use the fact that AX still contains ALLOCATE. */
9469 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9470 (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax));
9471
9472 if (sp_is_cfa_reg || TARGET_SEH)
9473 {
9474 if (sp_is_cfa_reg)
9475 m->fs.cfa_offset += allocate;
9476 RTX_FRAME_RELATED_P (insn) = 1;
9477 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9478 gen_rtx_SET (stack_pointer_rtx,
9479 plus_constant (Pmode, stack_pointer_rtx,
9480 -allocate)));
9481 }
9482 m->fs.sp_offset += allocate;
9483
9484 /* Use stack_pointer_rtx for relative addressing so that code works for
9485 realigned stack. But this means that we need a blockage to prevent
9486 stores based on the frame pointer from being scheduled before. */
9487 if (r10_live && eax_live)
9488 {
9489 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9490 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9491 gen_frame_mem (word_mode, t));
9492 t = plus_constant (Pmode, t, UNITS_PER_WORD);
9493 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9494 gen_frame_mem (word_mode, t));
9495 emit_insn (gen_memory_blockage ());
9496 }
9497 else if (eax_live || r10_live)
9498 {
9499 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9500 emit_move_insn (gen_rtx_REG (word_mode,
9501 (eax_live ? AX_REG : R10_REG)),
9502 gen_frame_mem (word_mode, t));
9503 emit_insn (gen_memory_blockage ());
9504 }
9505 }
9506 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9507
9508 /* If we havn't already set up the frame pointer, do so now. */
9509 if (frame_pointer_needed && !m->fs.fp_valid)
9510 {
9511 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9512 GEN_INT (frame.stack_pointer_offset
9513 - frame.hard_frame_pointer_offset));
9514 insn = emit_insn (insn);
9515 RTX_FRAME_RELATED_P (insn) = 1;
9516 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9517
9518 if (m->fs.cfa_reg == stack_pointer_rtx)
9519 m->fs.cfa_reg = hard_frame_pointer_rtx;
9520 m->fs.fp_offset = frame.hard_frame_pointer_offset;
9521 m->fs.fp_valid = true;
9522 }
9523
9524 if (!int_registers_saved)
9525 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
9526 if (!sse_registers_saved)
9527 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9528 else if (save_stub_call_needed)
9529 ix86_emit_outlined_ms2sysv_save (frame);
9530
9531 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9532 in PROLOGUE. */
9533 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9534 {
9535 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9536 insn = emit_insn (gen_set_got (pic));
9537 RTX_FRAME_RELATED_P (insn) = 1;
9538 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9539 emit_insn (gen_prologue_use (pic));
9540 /* Deleting already emmitted SET_GOT if exist and allocated to
9541 REAL_PIC_OFFSET_TABLE_REGNUM. */
9542 ix86_elim_entry_set_got (reg: pic);
9543 }
9544
9545 if (crtl->drap_reg && !crtl->stack_realign_needed)
9546 {
9547 /* vDRAP is setup but after reload it turns out stack realign
9548 isn't necessary, here we will emit prologue to setup DRAP
9549 without stack realign adjustment */
9550 t = choose_baseaddr (cfa_offset: 0, NULL);
9551 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9552 }
9553
9554 /* Prevent instructions from being scheduled into register save push
9555 sequence when access to the redzone area is done through frame pointer.
9556 The offset between the frame pointer and the stack pointer is calculated
9557 relative to the value of the stack pointer at the end of the function
9558 prologue, and moving instructions that access redzone area via frame
9559 pointer inside push sequence violates this assumption. */
9560 if (frame_pointer_needed && frame.red_zone_size)
9561 emit_insn (gen_memory_blockage ());
9562
9563 /* SEH requires that the prologue end within 256 bytes of the start of
9564 the function. Prevent instruction schedules that would extend that.
9565 Further, prevent alloca modifications to the stack pointer from being
9566 combined with prologue modifications. */
9567 if (TARGET_SEH)
9568 emit_insn (gen_prologue_use (stack_pointer_rtx));
9569}
9570
9571/* Emit code to restore REG using a POP or POPP insn. */
9572
9573static void
9574ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9575{
9576 struct machine_function *m = cfun->machine;
9577 rtx_insn *insn = emit_insn (gen_pop (arg: reg, ppx_p));
9578
9579 ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset);
9580 m->fs.sp_offset -= UNITS_PER_WORD;
9581
9582 if (m->fs.cfa_reg == crtl->drap_reg
9583 && REGNO (reg) == REGNO (crtl->drap_reg))
9584 {
9585 /* Previously we'd represented the CFA as an expression
9586 like *(%ebp - 8). We've just popped that value from
9587 the stack, which means we need to reset the CFA to
9588 the drap register. This will remain until we restore
9589 the stack pointer. */
9590 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9591 RTX_FRAME_RELATED_P (insn) = 1;
9592
9593 /* This means that the DRAP register is valid for addressing too. */
9594 m->fs.drap_valid = true;
9595 return;
9596 }
9597
9598 if (m->fs.cfa_reg == stack_pointer_rtx)
9599 {
9600 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9601 x = gen_rtx_SET (stack_pointer_rtx, x);
9602 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9603 RTX_FRAME_RELATED_P (insn) = 1;
9604
9605 m->fs.cfa_offset -= UNITS_PER_WORD;
9606 }
9607
9608 /* When the frame pointer is the CFA, and we pop it, we are
9609 swapping back to the stack pointer as the CFA. This happens
9610 for stack frames that don't allocate other data, so we assume
9611 the stack pointer is now pointing at the return address, i.e.
9612 the function entry state, which makes the offset be 1 word. */
9613 if (reg == hard_frame_pointer_rtx)
9614 {
9615 m->fs.fp_valid = false;
9616 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9617 {
9618 m->fs.cfa_reg = stack_pointer_rtx;
9619 m->fs.cfa_offset -= UNITS_PER_WORD;
9620
9621 add_reg_note (insn, REG_CFA_DEF_CFA,
9622 plus_constant (Pmode, stack_pointer_rtx,
9623 m->fs.cfa_offset));
9624 RTX_FRAME_RELATED_P (insn) = 1;
9625 }
9626 }
9627}
9628
9629/* Emit code to restore REG using a POP2 insn. */
9630static void
9631ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9632{
9633 struct machine_function *m = cfun->machine;
9634 const int offset = UNITS_PER_WORD * 2;
9635 rtx_insn *insn;
9636
9637 rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9638 stack_pointer_rtx));
9639
9640 if (ppx_p)
9641 insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9642 else
9643 insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9644
9645 RTX_FRAME_RELATED_P (insn) = 1;
9646
9647 rtx dwarf = NULL_RTX;
9648 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9649 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9650 REG_NOTES (insn) = dwarf;
9651 m->fs.sp_offset -= offset;
9652
9653 if (m->fs.cfa_reg == crtl->drap_reg
9654 && (REGNO (reg1) == REGNO (crtl->drap_reg)
9655 || REGNO (reg2) == REGNO (crtl->drap_reg)))
9656 {
9657 /* Previously we'd represented the CFA as an expression
9658 like *(%ebp - 8). We've just popped that value from
9659 the stack, which means we need to reset the CFA to
9660 the drap register. This will remain until we restore
9661 the stack pointer. */
9662 add_reg_note (insn, REG_CFA_DEF_CFA,
9663 REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9664 RTX_FRAME_RELATED_P (insn) = 1;
9665
9666 /* This means that the DRAP register is valid for addressing too. */
9667 m->fs.drap_valid = true;
9668 return;
9669 }
9670
9671 if (m->fs.cfa_reg == stack_pointer_rtx)
9672 {
9673 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9674 x = gen_rtx_SET (stack_pointer_rtx, x);
9675 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9676 RTX_FRAME_RELATED_P (insn) = 1;
9677
9678 m->fs.cfa_offset -= offset;
9679 }
9680
9681 /* When the frame pointer is the CFA, and we pop it, we are
9682 swapping back to the stack pointer as the CFA. This happens
9683 for stack frames that don't allocate other data, so we assume
9684 the stack pointer is now pointing at the return address, i.e.
9685 the function entry state, which makes the offset be 1 word. */
9686 if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9687 {
9688 m->fs.fp_valid = false;
9689 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9690 {
9691 m->fs.cfa_reg = stack_pointer_rtx;
9692 m->fs.cfa_offset -= offset;
9693
9694 add_reg_note (insn, REG_CFA_DEF_CFA,
9695 plus_constant (Pmode, stack_pointer_rtx,
9696 m->fs.cfa_offset));
9697 RTX_FRAME_RELATED_P (insn) = 1;
9698 }
9699 }
9700}
9701
9702/* Emit code to restore saved registers using POP insns. */
9703
9704static void
9705ix86_emit_restore_regs_using_pop (bool ppx_p)
9706{
9707 unsigned int regno;
9708
9709 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9710 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9711 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), ppx_p);
9712}
9713
9714/* Emit code to restore saved registers using POP2 insns. */
9715
9716static void
9717ix86_emit_restore_regs_using_pop2 (void)
9718{
9719 int regno;
9720 int regno_list[2];
9721 regno_list[0] = regno_list[1] = -1;
9722 int loaded_regnum = 0;
9723 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9724
9725 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9726 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9727 {
9728 if (aligned)
9729 {
9730 regno_list[loaded_regnum++] = regno;
9731 if (loaded_regnum == 2)
9732 {
9733 gcc_assert (regno_list[0] != -1
9734 && regno_list[1] != -1
9735 && regno_list[0] != regno_list[1]);
9736
9737 ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode,
9738 regno_list[0]),
9739 reg2: gen_rtx_REG (word_mode,
9740 regno_list[1]),
9741 TARGET_APX_PPX);
9742 loaded_regnum = 0;
9743 regno_list[0] = regno_list[1] = -1;
9744 }
9745 }
9746 else
9747 {
9748 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno),
9749 TARGET_APX_PPX);
9750 aligned = true;
9751 }
9752 }
9753
9754 if (loaded_regnum == 1)
9755 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]),
9756 TARGET_APX_PPX);
9757}
9758
9759/* Emit code and notes for the LEAVE instruction. If insn is non-null,
9760 omits the emit and only attaches the notes. */
9761
9762static void
9763ix86_emit_leave (rtx_insn *insn)
9764{
9765 struct machine_function *m = cfun->machine;
9766
9767 if (!insn)
9768 insn = emit_insn (gen_leave (arg0: word_mode));
9769
9770 ix86_add_queued_cfa_restore_notes (insn);
9771
9772 gcc_assert (m->fs.fp_valid);
9773 m->fs.sp_valid = true;
9774 m->fs.sp_realigned = false;
9775 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9776 m->fs.fp_valid = false;
9777
9778 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9779 {
9780 m->fs.cfa_reg = stack_pointer_rtx;
9781 m->fs.cfa_offset = m->fs.sp_offset;
9782
9783 add_reg_note (insn, REG_CFA_DEF_CFA,
9784 plus_constant (Pmode, stack_pointer_rtx,
9785 m->fs.sp_offset));
9786 RTX_FRAME_RELATED_P (insn) = 1;
9787 }
9788 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9789 cfa_offset: m->fs.fp_offset);
9790}
9791
9792/* Emit code to restore saved registers using MOV insns.
9793 First register is restored from CFA - CFA_OFFSET. */
9794static void
9795ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9796 bool maybe_eh_return)
9797{
9798 struct machine_function *m = cfun->machine;
9799 unsigned int regno;
9800
9801 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9802 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9803 {
9804 rtx reg = gen_rtx_REG (word_mode, regno);
9805 rtx mem;
9806 rtx_insn *insn;
9807
9808 mem = choose_baseaddr (cfa_offset, NULL);
9809 mem = gen_frame_mem (word_mode, mem);
9810 insn = emit_move_insn (reg, mem);
9811
9812 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9813 {
9814 /* Previously we'd represented the CFA as an expression
9815 like *(%ebp - 8). We've just popped that value from
9816 the stack, which means we need to reset the CFA to
9817 the drap register. This will remain until we restore
9818 the stack pointer. */
9819 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9820 RTX_FRAME_RELATED_P (insn) = 1;
9821
9822 /* This means that the DRAP register is valid for addressing. */
9823 m->fs.drap_valid = true;
9824 }
9825 else
9826 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9827
9828 cfa_offset -= UNITS_PER_WORD;
9829 }
9830}
9831
9832/* Emit code to restore saved registers using MOV insns.
9833 First register is restored from CFA - CFA_OFFSET. */
9834static void
9835ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9836 bool maybe_eh_return)
9837{
9838 unsigned int regno;
9839
9840 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9841 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9842 {
9843 rtx reg = gen_rtx_REG (V4SFmode, regno);
9844 rtx mem;
9845 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9846
9847 mem = choose_baseaddr (cfa_offset, align: &align);
9848 mem = gen_rtx_MEM (V4SFmode, mem);
9849
9850 /* The location aligment depends upon the base register. */
9851 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9852 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9853 set_mem_align (mem, align);
9854 emit_insn (gen_rtx_SET (reg, mem));
9855
9856 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9857
9858 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9859 }
9860}
9861
9862static void
9863ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9864 bool use_call, int style)
9865{
9866 struct machine_function *m = cfun->machine;
9867 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9868 + m->call_ms2sysv_extra_regs;
9869 rtvec v;
9870 unsigned int elems_needed, align, i, vi = 0;
9871 rtx_insn *insn;
9872 rtx sym, tmp;
9873 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9874 rtx r10 = NULL_RTX;
9875 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9876 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9877 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9878 rtx rsi_frame_load = NULL_RTX;
9879 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9880 enum xlogue_stub stub;
9881
9882 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9883
9884 /* If using a realigned stack, we should never start with padding. */
9885 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9886
9887 /* Setup RSI as the stub's base pointer. */
9888 align = GET_MODE_ALIGNMENT (V4SFmode);
9889 tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG);
9890 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9891
9892 emit_insn (gen_rtx_SET (rsi, tmp));
9893
9894 /* Get a symbol for the stub. */
9895 if (frame_pointer_needed)
9896 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9897 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9898 else
9899 stub = use_call ? XLOGUE_STUB_RESTORE
9900 : XLOGUE_STUB_RESTORE_TAIL;
9901 sym = xlogue.get_stub_rtx (stub);
9902
9903 elems_needed = ncregs;
9904 if (use_call)
9905 elems_needed += 1;
9906 else
9907 elems_needed += frame_pointer_needed ? 5 : 3;
9908 v = rtvec_alloc (elems_needed);
9909
9910 /* We call the epilogue stub when we need to pop incoming args or we are
9911 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9912 epilogue stub and it is the tail-call. */
9913 if (use_call)
9914 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9915 else
9916 {
9917 RTVEC_ELT (v, vi++) = ret_rtx;
9918 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9919 if (frame_pointer_needed)
9920 {
9921 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9922 gcc_assert (m->fs.fp_valid);
9923 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9924
9925 tmp = plus_constant (DImode, rbp, 8);
9926 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9927 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9928 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9929 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9930 }
9931 else
9932 {
9933 /* If no hard frame pointer, we set R10 to the SP restore value. */
9934 gcc_assert (!m->fs.fp_valid);
9935 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9936 gcc_assert (m->fs.sp_valid);
9937
9938 r10 = gen_rtx_REG (DImode, R10_REG);
9939 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9940 emit_insn (gen_rtx_SET (r10, tmp));
9941
9942 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9943 }
9944 }
9945
9946 /* Generate frame load insns and restore notes. */
9947 for (i = 0; i < ncregs; ++i)
9948 {
9949 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
9950 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9951 rtx reg, frame_load;
9952
9953 reg = gen_rtx_REG (mode, r.regno);
9954 frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset);
9955
9956 /* Save RSI frame load insn & note to add last. */
9957 if (r.regno == SI_REG)
9958 {
9959 gcc_assert (!rsi_frame_load);
9960 rsi_frame_load = frame_load;
9961 rsi_restore_offset = r.offset;
9962 }
9963 else
9964 {
9965 RTVEC_ELT (v, vi++) = frame_load;
9966 ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset);
9967 }
9968 }
9969
9970 /* Add RSI frame load & restore note at the end. */
9971 gcc_assert (rsi_frame_load);
9972 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9973 RTVEC_ELT (v, vi++) = rsi_frame_load;
9974 ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG),
9975 cfa_offset: rsi_restore_offset);
9976
9977 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9978 if (!use_call && !frame_pointer_needed)
9979 {
9980 gcc_assert (m->fs.sp_valid);
9981 gcc_assert (!m->fs.sp_realigned);
9982
9983 /* At this point, R10 should point to frame.stack_realign_offset. */
9984 if (m->fs.cfa_reg == stack_pointer_rtx)
9985 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9986 m->fs.sp_offset = frame.stack_realign_offset;
9987 }
9988
9989 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9990 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9991 if (use_call)
9992 insn = emit_insn (tmp);
9993 else
9994 {
9995 insn = emit_jump_insn (tmp);
9996 JUMP_LABEL (insn) = ret_rtx;
9997
9998 if (frame_pointer_needed)
9999 ix86_emit_leave (insn);
10000 else
10001 {
10002 /* Need CFA adjust note. */
10003 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
10004 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
10005 }
10006 }
10007
10008 RTX_FRAME_RELATED_P (insn) = true;
10009 ix86_add_queued_cfa_restore_notes (insn);
10010
10011 /* If we're not doing a tail-call, we need to adjust the stack. */
10012 if (use_call && m->fs.sp_valid)
10013 {
10014 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
10015 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10016 GEN_INT (dealloc), style,
10017 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10018 }
10019}
10020
10021/* Restore function stack, frame, and registers. */
10022
10023void
10024ix86_expand_epilogue (int style)
10025{
10026 struct machine_function *m = cfun->machine;
10027 struct machine_frame_state frame_state_save = m->fs;
10028 bool restore_regs_via_mov;
10029 bool using_drap;
10030 bool restore_stub_is_tail = false;
10031
10032 if (ix86_function_naked (fn: current_function_decl))
10033 {
10034 /* The program should not reach this point. */
10035 emit_insn (gen_ud2 ());
10036 return;
10037 }
10038
10039 ix86_finalize_stack_frame_flags ();
10040 const struct ix86_frame &frame = cfun->machine->frame;
10041
10042 m->fs.sp_realigned = stack_realign_fp;
10043 m->fs.sp_valid = stack_realign_fp
10044 || !frame_pointer_needed
10045 || crtl->sp_is_unchanging;
10046 gcc_assert (!m->fs.sp_valid
10047 || m->fs.sp_offset == frame.stack_pointer_offset);
10048
10049 /* The FP must be valid if the frame pointer is present. */
10050 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10051 gcc_assert (!m->fs.fp_valid
10052 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10053
10054 /* We must have *some* valid pointer to the stack frame. */
10055 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10056
10057 /* The DRAP is never valid at this point. */
10058 gcc_assert (!m->fs.drap_valid);
10059
10060 /* See the comment about red zone and frame
10061 pointer usage in ix86_expand_prologue. */
10062 if (frame_pointer_needed && frame.red_zone_size)
10063 emit_insn (gen_memory_blockage ());
10064
10065 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10066 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10067
10068 /* Determine the CFA offset of the end of the red-zone. */
10069 m->fs.red_zone_offset = 0;
10070 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10071 {
10072 /* The red-zone begins below return address and error code in
10073 exception handler. */
10074 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
10075
10076 /* When the register save area is in the aligned portion of
10077 the stack, determine the maximum runtime displacement that
10078 matches up with the aligned frame. */
10079 if (stack_realign_drap)
10080 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10081 + UNITS_PER_WORD);
10082 }
10083
10084 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
10085
10086 /* Special care must be taken for the normal return case of a function
10087 using eh_return: the eax and edx registers are marked as saved, but
10088 not restored along this path. Adjust the save location to match. */
10089 if (crtl->calls_eh_return && style != 2)
10090 reg_save_offset -= 2 * UNITS_PER_WORD;
10091
10092 /* EH_RETURN requires the use of moves to function properly. */
10093 if (crtl->calls_eh_return)
10094 restore_regs_via_mov = true;
10095 /* SEH requires the use of pops to identify the epilogue. */
10096 else if (TARGET_SEH)
10097 restore_regs_via_mov = false;
10098 /* If we already save reg with pushp, don't use move at epilogue. */
10099 else if (m->fs.apx_ppx_used)
10100 restore_regs_via_mov = false;
10101 /* If we're only restoring one register and sp cannot be used then
10102 using a move instruction to restore the register since it's
10103 less work than reloading sp and popping the register. */
10104 else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1)
10105 restore_regs_via_mov = true;
10106 else if (TARGET_EPILOGUE_USING_MOVE
10107 && cfun->machine->use_fast_prologue_epilogue
10108 && (frame.nregs > 1
10109 || m->fs.sp_offset != reg_save_offset))
10110 restore_regs_via_mov = true;
10111 else if (frame_pointer_needed
10112 && !frame.nregs
10113 && m->fs.sp_offset != reg_save_offset)
10114 restore_regs_via_mov = true;
10115 else if (frame_pointer_needed
10116 && TARGET_USE_LEAVE
10117 && cfun->machine->use_fast_prologue_epilogue
10118 && frame.nregs == 1)
10119 restore_regs_via_mov = true;
10120 else
10121 restore_regs_via_mov = false;
10122
10123 if (restore_regs_via_mov || frame.nsseregs)
10124 {
10125 /* Ensure that the entire register save area is addressable via
10126 the stack pointer, if we will restore SSE regs via sp. */
10127 if (TARGET_64BIT
10128 && m->fs.sp_offset > 0x7fffffff
10129 && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1)
10130 && (frame.nsseregs + frame.nregs) != 0)
10131 {
10132 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10133 GEN_INT (m->fs.sp_offset
10134 - frame.sse_reg_save_offset),
10135 style,
10136 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10137 }
10138 }
10139
10140 /* If there are any SSE registers to restore, then we have to do it
10141 via moves, since there's obviously no pop for SSE regs. */
10142 if (frame.nsseregs)
10143 ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset,
10144 maybe_eh_return: style == 2);
10145
10146 if (m->call_ms2sysv)
10147 {
10148 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
10149
10150 /* We cannot use a tail-call for the stub if:
10151 1. We have to pop incoming args,
10152 2. We have additional int regs to restore, or
10153 3. A sibling call will be the tail-call, or
10154 4. We are emitting an eh_return_internal epilogue.
10155
10156 TODO: Item 4 has not yet tested!
10157
10158 If any of the above are true, we will call the stub rather than
10159 jump to it. */
10160 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
10161 ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style);
10162 }
10163
10164 /* If using out-of-line stub that is a tail-call, then...*/
10165 if (m->call_ms2sysv && restore_stub_is_tail)
10166 {
10167 /* TODO: parinoid tests. (remove eventually) */
10168 gcc_assert (m->fs.sp_valid);
10169 gcc_assert (!m->fs.sp_realigned);
10170 gcc_assert (!m->fs.fp_valid);
10171 gcc_assert (!m->fs.realigned);
10172 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
10173 gcc_assert (!crtl->drap_reg);
10174 gcc_assert (!frame.nregs);
10175 }
10176 else if (restore_regs_via_mov)
10177 {
10178 rtx t;
10179
10180 if (frame.nregs)
10181 ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2);
10182
10183 /* eh_return epilogues need %ecx added to the stack pointer. */
10184 if (style == 2)
10185 {
10186 rtx sa = EH_RETURN_STACKADJ_RTX;
10187 rtx_insn *insn;
10188
10189 /* Stack realignment doesn't work with eh_return. */
10190 if (crtl->stack_realign_needed)
10191 sorry ("Stack realignment not supported with "
10192 "%<__builtin_eh_return%>");
10193
10194 /* regparm nested functions don't work with eh_return. */
10195 if (ix86_static_chain_on_stack)
10196 sorry ("regparm nested function not supported with "
10197 "%<__builtin_eh_return%>");
10198
10199 if (frame_pointer_needed)
10200 {
10201 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10202 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
10203 emit_insn (gen_rtx_SET (sa, t));
10204
10205 /* NB: eh_return epilogues must restore the frame pointer
10206 in word_mode since the upper 32 bits of RBP register
10207 can have any values. */
10208 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10209 rtx frame_reg = gen_rtx_REG (word_mode,
10210 HARD_FRAME_POINTER_REGNUM);
10211 insn = emit_move_insn (frame_reg, t);
10212
10213 /* Note that we use SA as a temporary CFA, as the return
10214 address is at the proper place relative to it. We
10215 pretend this happens at the FP restore insn because
10216 prior to this insn the FP would be stored at the wrong
10217 offset relative to SA, and after this insn we have no
10218 other reasonable register to use for the CFA. We don't
10219 bother resetting the CFA to the SP for the duration of
10220 the return insn, unless the control flow instrumentation
10221 is done. In this case the SP is used later and we have
10222 to reset CFA to SP. */
10223 add_reg_note (insn, REG_CFA_DEF_CFA,
10224 plus_constant (Pmode, sa, UNITS_PER_WORD));
10225 ix86_add_queued_cfa_restore_notes (insn);
10226 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10227 RTX_FRAME_RELATED_P (insn) = 1;
10228
10229 m->fs.cfa_reg = sa;
10230 m->fs.cfa_offset = UNITS_PER_WORD;
10231 m->fs.fp_valid = false;
10232
10233 pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa,
10234 const0_rtx, style,
10235 flag_cf_protection);
10236 }
10237 else
10238 {
10239 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10240 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10241 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10242 ix86_add_queued_cfa_restore_notes (insn);
10243
10244 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10245 if (m->fs.cfa_offset != UNITS_PER_WORD)
10246 {
10247 m->fs.cfa_offset = UNITS_PER_WORD;
10248 add_reg_note (insn, REG_CFA_DEF_CFA,
10249 plus_constant (Pmode, stack_pointer_rtx,
10250 UNITS_PER_WORD));
10251 RTX_FRAME_RELATED_P (insn) = 1;
10252 }
10253 }
10254 m->fs.sp_offset = UNITS_PER_WORD;
10255 m->fs.sp_valid = true;
10256 m->fs.sp_realigned = false;
10257 }
10258 }
10259 else
10260 {
10261 /* SEH requires that the function end with (1) a stack adjustment
10262 if necessary, (2) a sequence of pops, and (3) a return or
10263 jump instruction. Prevent insns from the function body from
10264 being scheduled into this sequence. */
10265 if (TARGET_SEH)
10266 {
10267 /* Prevent a catch region from being adjacent to the standard
10268 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10269 nor several other flags that would be interesting to test are
10270 set up yet. */
10271 if (flag_non_call_exceptions)
10272 emit_insn (gen_nops (const1_rtx));
10273 else
10274 emit_insn (gen_blockage ());
10275 }
10276
10277 /* First step is to deallocate the stack frame so that we can
10278 pop the registers. If the stack pointer was realigned, it needs
10279 to be restored now. Also do it on SEH target for very large
10280 frame as the emitted instructions aren't allowed by the ABI
10281 in epilogues. */
10282 if (!m->fs.sp_valid || m->fs.sp_realigned
10283 || (TARGET_SEH
10284 && (m->fs.sp_offset - reg_save_offset
10285 >= SEH_MAX_FRAME_SIZE)))
10286 {
10287 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10288 GEN_INT (m->fs.fp_offset
10289 - reg_save_offset),
10290 style, set_cfa: false);
10291 }
10292 else if (m->fs.sp_offset != reg_save_offset)
10293 {
10294 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10295 GEN_INT (m->fs.sp_offset
10296 - reg_save_offset),
10297 style,
10298 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
10299 }
10300
10301 if (TARGET_APX_PUSH2POP2
10302 && ix86_can_use_push2pop2 ()
10303 && m->func_type == TYPE_NORMAL)
10304 ix86_emit_restore_regs_using_pop2 ();
10305 else
10306 ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10307 }
10308
10309 /* If we used a stack pointer and haven't already got rid of it,
10310 then do so now. */
10311 if (m->fs.fp_valid)
10312 {
10313 /* If the stack pointer is valid and pointing at the frame
10314 pointer store address, then we only need a pop. */
10315 if (sp_valid_at (cfa_offset: frame.hfp_save_offset)
10316 && m->fs.sp_offset == frame.hfp_save_offset)
10317 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10318 /* Leave results in shorter dependency chains on CPUs that are
10319 able to grok it fast. */
10320 else if (TARGET_USE_LEAVE
10321 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10322 || !cfun->machine->use_fast_prologue_epilogue)
10323 ix86_emit_leave (NULL);
10324 else
10325 {
10326 pro_epilogue_adjust_stack (stack_pointer_rtx,
10327 hard_frame_pointer_rtx,
10328 const0_rtx, style, set_cfa: !using_drap);
10329 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10330 }
10331 }
10332
10333 if (using_drap)
10334 {
10335 int param_ptr_offset = UNITS_PER_WORD;
10336 rtx_insn *insn;
10337
10338 gcc_assert (stack_realign_drap);
10339
10340 if (ix86_static_chain_on_stack)
10341 param_ptr_offset += UNITS_PER_WORD;
10342 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10343 param_ptr_offset += UNITS_PER_WORD;
10344
10345 insn = emit_insn (gen_rtx_SET
10346 (stack_pointer_rtx,
10347 plus_constant (Pmode, crtl->drap_reg,
10348 -param_ptr_offset)));
10349 m->fs.cfa_reg = stack_pointer_rtx;
10350 m->fs.cfa_offset = param_ptr_offset;
10351 m->fs.sp_offset = param_ptr_offset;
10352 m->fs.realigned = false;
10353
10354 add_reg_note (insn, REG_CFA_DEF_CFA,
10355 plus_constant (Pmode, stack_pointer_rtx,
10356 param_ptr_offset));
10357 RTX_FRAME_RELATED_P (insn) = 1;
10358
10359 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10360 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10361 }
10362
10363 /* At this point the stack pointer must be valid, and we must have
10364 restored all of the registers. We may not have deallocated the
10365 entire stack frame. We've delayed this until now because it may
10366 be possible to merge the local stack deallocation with the
10367 deallocation forced by ix86_static_chain_on_stack. */
10368 gcc_assert (m->fs.sp_valid);
10369 gcc_assert (!m->fs.sp_realigned);
10370 gcc_assert (!m->fs.fp_valid);
10371 gcc_assert (!m->fs.realigned);
10372 if (m->fs.sp_offset != UNITS_PER_WORD)
10373 {
10374 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10375 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10376 style, set_cfa: true);
10377 }
10378 else
10379 ix86_add_queued_cfa_restore_notes (insn: get_last_insn ());
10380
10381 /* Sibcall epilogues don't want a return instruction. */
10382 if (style == 0)
10383 {
10384 m->fs = frame_state_save;
10385 return;
10386 }
10387
10388 if (cfun->machine->func_type != TYPE_NORMAL)
10389 emit_jump_insn (gen_interrupt_return ());
10390 else if (crtl->args.pops_args && crtl->args.size)
10391 {
10392 rtx popc = GEN_INT (crtl->args.pops_args);
10393
10394 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10395 address, do explicit add, and jump indirectly to the caller. */
10396
10397 if (crtl->args.pops_args >= 65536)
10398 {
10399 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10400 rtx_insn *insn;
10401
10402 /* There is no "pascal" calling convention in any 64bit ABI. */
10403 gcc_assert (!TARGET_64BIT);
10404
10405 insn = emit_insn (gen_pop (arg: ecx));
10406 m->fs.cfa_offset -= UNITS_PER_WORD;
10407 m->fs.sp_offset -= UNITS_PER_WORD;
10408
10409 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10410 x = gen_rtx_SET (stack_pointer_rtx, x);
10411 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10412 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10413 RTX_FRAME_RELATED_P (insn) = 1;
10414
10415 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10416 offset: popc, style: -1, set_cfa: true);
10417 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10418 }
10419 else
10420 emit_jump_insn (gen_simple_return_pop_internal (popc));
10421 }
10422 else if (!m->call_ms2sysv || !restore_stub_is_tail)
10423 {
10424 /* In case of return from EH a simple return cannot be used
10425 as a return address will be compared with a shadow stack
10426 return address. Use indirect jump instead. */
10427 if (style == 2 && flag_cf_protection)
10428 {
10429 /* Register used in indirect jump must be in word_mode. But
10430 Pmode may not be the same as word_mode for x32. */
10431 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10432 rtx_insn *insn;
10433
10434 insn = emit_insn (gen_pop (arg: ecx));
10435 m->fs.cfa_offset -= UNITS_PER_WORD;
10436 m->fs.sp_offset -= UNITS_PER_WORD;
10437
10438 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10439 x = gen_rtx_SET (stack_pointer_rtx, x);
10440 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10441 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10442 RTX_FRAME_RELATED_P (insn) = 1;
10443
10444 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10445 }
10446 else
10447 emit_jump_insn (gen_simple_return_internal ());
10448 }
10449
10450 /* Restore the state back to the state from the prologue,
10451 so that it's correct for the next epilogue. */
10452 m->fs = frame_state_save;
10453}
10454
10455/* Reset from the function's potential modifications. */
10456
10457static void
10458ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10459{
10460 if (pic_offset_table_rtx
10461 && !ix86_use_pseudo_pic_reg ())
10462 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10463
10464 if (TARGET_MACHO)
10465 {
10466 rtx_insn *insn = get_last_insn ();
10467 rtx_insn *deleted_debug_label = NULL;
10468
10469 /* Mach-O doesn't support labels at the end of objects, so if
10470 it looks like we might want one, take special action.
10471 First, collect any sequence of deleted debug labels. */
10472 while (insn
10473 && NOTE_P (insn)
10474 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10475 {
10476 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10477 notes only, instead set their CODE_LABEL_NUMBER to -1,
10478 otherwise there would be code generation differences
10479 in between -g and -g0. */
10480 if (NOTE_P (insn) && NOTE_KIND (insn)
10481 == NOTE_INSN_DELETED_DEBUG_LABEL)
10482 deleted_debug_label = insn;
10483 insn = PREV_INSN (insn);
10484 }
10485
10486 /* If we have:
10487 label:
10488 barrier
10489 then this needs to be detected, so skip past the barrier. */
10490
10491 if (insn && BARRIER_P (insn))
10492 insn = PREV_INSN (insn);
10493
10494 /* Up to now we've only seen notes or barriers. */
10495 if (insn)
10496 {
10497 if (LABEL_P (insn)
10498 || (NOTE_P (insn)
10499 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10500 /* Trailing label. */
10501 fputs (s: "\tnop\n", stream: file);
10502 else if (cfun && ! cfun->is_thunk)
10503 {
10504 /* See if we have a completely empty function body, skipping
10505 the special case of the picbase thunk emitted as asm. */
10506 while (insn && ! INSN_P (insn))
10507 insn = PREV_INSN (insn);
10508 /* If we don't find any insns, we've got an empty function body;
10509 I.e. completely empty - without a return or branch. This is
10510 taken as the case where a function body has been removed
10511 because it contains an inline __builtin_unreachable(). GCC
10512 declares that reaching __builtin_unreachable() means UB so
10513 we're not obliged to do anything special; however, we want
10514 non-zero-sized function bodies. To meet this, and help the
10515 user out, let's trap the case. */
10516 if (insn == NULL)
10517 fputs (s: "\tud2\n", stream: file);
10518 }
10519 }
10520 else if (deleted_debug_label)
10521 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10522 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10523 CODE_LABEL_NUMBER (insn) = -1;
10524 }
10525}
10526
10527/* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10528
10529void
10530ix86_print_patchable_function_entry (FILE *file,
10531 unsigned HOST_WIDE_INT patch_area_size,
10532 bool record_p)
10533{
10534 if (cfun->machine->function_label_emitted)
10535 {
10536 /* NB: When ix86_print_patchable_function_entry is called after
10537 function table has been emitted, we have inserted or queued
10538 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10539 place. There is nothing to do here. */
10540 return;
10541 }
10542
10543 default_print_patchable_function_entry (file, patch_area_size,
10544 record_p);
10545}
10546
10547/* Output patchable area. NB: default_print_patchable_function_entry
10548 isn't available in i386.md. */
10549
10550void
10551ix86_output_patchable_area (unsigned int patch_area_size,
10552 bool record_p)
10553{
10554 default_print_patchable_function_entry (asm_out_file,
10555 patch_area_size,
10556 record_p);
10557}
10558
10559/* Return a scratch register to use in the split stack prologue. The
10560 split stack prologue is used for -fsplit-stack. It is the first
10561 instructions in the function, even before the regular prologue.
10562 The scratch register can be any caller-saved register which is not
10563 used for parameters or for the static chain. */
10564
10565static unsigned int
10566split_stack_prologue_scratch_regno (void)
10567{
10568 if (TARGET_64BIT)
10569 return R11_REG;
10570 else
10571 {
10572 bool is_fastcall, is_thiscall;
10573 int regparm;
10574
10575 is_fastcall = (lookup_attribute (attr_name: "fastcall",
10576 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10577 != NULL);
10578 is_thiscall = (lookup_attribute (attr_name: "thiscall",
10579 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10580 != NULL);
10581 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10582
10583 if (is_fastcall)
10584 {
10585 if (DECL_STATIC_CHAIN (cfun->decl))
10586 {
10587 sorry ("%<-fsplit-stack%> does not support fastcall with "
10588 "nested function");
10589 return INVALID_REGNUM;
10590 }
10591 return AX_REG;
10592 }
10593 else if (is_thiscall)
10594 {
10595 if (!DECL_STATIC_CHAIN (cfun->decl))
10596 return DX_REG;
10597 return AX_REG;
10598 }
10599 else if (regparm < 3)
10600 {
10601 if (!DECL_STATIC_CHAIN (cfun->decl))
10602 return CX_REG;
10603 else
10604 {
10605 if (regparm >= 2)
10606 {
10607 sorry ("%<-fsplit-stack%> does not support 2 register "
10608 "parameters for a nested function");
10609 return INVALID_REGNUM;
10610 }
10611 return DX_REG;
10612 }
10613 }
10614 else
10615 {
10616 /* FIXME: We could make this work by pushing a register
10617 around the addition and comparison. */
10618 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10619 return INVALID_REGNUM;
10620 }
10621 }
10622}
10623
10624/* A SYMBOL_REF for the function which allocates new stackspace for
10625 -fsplit-stack. */
10626
10627static GTY(()) rtx split_stack_fn;
10628
10629/* A SYMBOL_REF for the more stack function when using the large
10630 model. */
10631
10632static GTY(()) rtx split_stack_fn_large;
10633
10634/* Return location of the stack guard value in the TLS block. */
10635
10636rtx
10637ix86_split_stack_guard (void)
10638{
10639 int offset;
10640 addr_space_t as = DEFAULT_TLS_SEG_REG;
10641 rtx r;
10642
10643 gcc_assert (flag_split_stack);
10644
10645#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10646 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10647#else
10648 gcc_unreachable ();
10649#endif
10650
10651 r = GEN_INT (offset);
10652 r = gen_const_mem (Pmode, r);
10653 set_mem_addr_space (r, as);
10654
10655 return r;
10656}
10657
10658/* Handle -fsplit-stack. These are the first instructions in the
10659 function, even before the regular prologue. */
10660
10661void
10662ix86_expand_split_stack_prologue (void)
10663{
10664 HOST_WIDE_INT allocate;
10665 unsigned HOST_WIDE_INT args_size;
10666 rtx_code_label *label;
10667 rtx limit, current, allocate_rtx, call_fusage;
10668 rtx_insn *call_insn;
10669 unsigned int scratch_regno = INVALID_REGNUM;
10670 rtx scratch_reg = NULL_RTX;
10671 rtx_code_label *varargs_label = NULL;
10672 rtx fn;
10673
10674 gcc_assert (flag_split_stack && reload_completed);
10675
10676 ix86_finalize_stack_frame_flags ();
10677 struct ix86_frame &frame = cfun->machine->frame;
10678 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10679
10680 /* This is the label we will branch to if we have enough stack
10681 space. We expect the basic block reordering pass to reverse this
10682 branch if optimizing, so that we branch in the unlikely case. */
10683 label = gen_label_rtx ();
10684
10685 /* We need to compare the stack pointer minus the frame size with
10686 the stack boundary in the TCB. The stack boundary always gives
10687 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10688 can compare directly. Otherwise we need to do an addition. */
10689
10690 limit = ix86_split_stack_guard ();
10691
10692 if (allocate >= SPLIT_STACK_AVAILABLE
10693 || flag_force_indirect_call)
10694 {
10695 scratch_regno = split_stack_prologue_scratch_regno ();
10696 if (scratch_regno == INVALID_REGNUM)
10697 return;
10698 }
10699
10700 if (allocate >= SPLIT_STACK_AVAILABLE)
10701 {
10702 rtx offset;
10703
10704 /* We need a scratch register to hold the stack pointer minus
10705 the required frame size. Since this is the very start of the
10706 function, the scratch register can be any caller-saved
10707 register which is not used for parameters. */
10708 offset = GEN_INT (- allocate);
10709
10710 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10711 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10712 {
10713 /* We don't use gen_add in this case because it will
10714 want to split to lea, but when not optimizing the insn
10715 will not be split after this point. */
10716 emit_insn (gen_rtx_SET (scratch_reg,
10717 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10718 offset)));
10719 }
10720 else
10721 {
10722 emit_move_insn (scratch_reg, offset);
10723 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10724 }
10725 current = scratch_reg;
10726 }
10727 else
10728 current = stack_pointer_rtx;
10729
10730 ix86_expand_branch (GEU, current, limit, label);
10731 rtx_insn *jump_insn = get_last_insn ();
10732 JUMP_LABEL (jump_insn) = label;
10733
10734 /* Mark the jump as very likely to be taken. */
10735 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10736
10737 if (split_stack_fn == NULL_RTX)
10738 {
10739 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10740 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10741 }
10742 fn = split_stack_fn;
10743
10744 /* Get more stack space. We pass in the desired stack space and the
10745 size of the arguments to copy to the new stack. In 32-bit mode
10746 we push the parameters; __morestack will return on a new stack
10747 anyhow. In 64-bit mode we pass the parameters in r10 and
10748 r11. */
10749 allocate_rtx = GEN_INT (allocate);
10750 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10751 call_fusage = NULL_RTX;
10752 rtx pop = NULL_RTX;
10753 if (TARGET_64BIT)
10754 {
10755 rtx reg10, reg11;
10756
10757 reg10 = gen_rtx_REG (DImode, R10_REG);
10758 reg11 = gen_rtx_REG (DImode, R11_REG);
10759
10760 /* If this function uses a static chain, it will be in %r10.
10761 Preserve it across the call to __morestack. */
10762 if (DECL_STATIC_CHAIN (cfun->decl))
10763 {
10764 rtx rax;
10765
10766 rax = gen_rtx_REG (word_mode, AX_REG);
10767 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10768 use_reg (fusage: &call_fusage, reg: rax);
10769 }
10770
10771 if (flag_force_indirect_call
10772 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10773 {
10774 HOST_WIDE_INT argval;
10775
10776 if (split_stack_fn_large == NULL_RTX)
10777 {
10778 split_stack_fn_large
10779 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10780 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10781 }
10782
10783 fn = split_stack_fn_large;
10784
10785 if (ix86_cmodel == CM_LARGE_PIC)
10786 {
10787 rtx_code_label *label;
10788 rtx x;
10789
10790 gcc_assert (Pmode == DImode);
10791
10792 label = gen_label_rtx ();
10793 emit_label (label);
10794 LABEL_PRESERVE_P (label) = 1;
10795 emit_insn (gen_set_rip_rex64 (reg10, label));
10796 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10797 emit_insn (gen_add2_insn (reg10, reg11));
10798 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
10799 x = gen_rtx_CONST (Pmode, x);
10800 emit_move_insn (reg11, x);
10801 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10802 x = gen_const_mem (Pmode, x);
10803 fn = copy_to_suggested_reg (x, reg11, Pmode);
10804 }
10805 else if (ix86_cmodel == CM_LARGE)
10806 fn = copy_to_suggested_reg (fn, reg11, Pmode);
10807
10808 /* When using the large model we need to load the address
10809 into a register, and we've run out of registers. So we
10810 switch to a different calling convention, and we call a
10811 different function: __morestack_large. We pass the
10812 argument size in the upper 32 bits of r10 and pass the
10813 frame size in the lower 32 bits. */
10814 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10815 gcc_assert ((args_size & 0xffffffff) == args_size);
10816
10817 argval = ((args_size << 16) << 16) + allocate;
10818 emit_move_insn (reg10, GEN_INT (argval));
10819 }
10820 else
10821 {
10822 emit_move_insn (reg10, allocate_rtx);
10823 emit_move_insn (reg11, GEN_INT (args_size));
10824 use_reg (fusage: &call_fusage, reg: reg11);
10825 }
10826
10827 use_reg (fusage: &call_fusage, reg: reg10);
10828 }
10829 else
10830 {
10831 if (flag_force_indirect_call && flag_pic)
10832 {
10833 rtx x;
10834
10835 gcc_assert (Pmode == SImode);
10836
10837 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10838
10839 emit_insn (gen_set_got (scratch_reg));
10840 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
10841 UNSPEC_GOT);
10842 x = gen_rtx_CONST (Pmode, x);
10843 x = gen_rtx_PLUS (Pmode, scratch_reg, x);
10844 x = gen_const_mem (Pmode, x);
10845 fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
10846 }
10847
10848 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10849 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10850 insn = emit_insn (gen_push (arg: allocate_rtx));
10851 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10852 pop = GEN_INT (2 * UNITS_PER_WORD);
10853 }
10854
10855 if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
10856 {
10857 scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
10858
10859 if (GET_MODE (fn) != word_mode)
10860 fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
10861
10862 fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
10863 }
10864
10865 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10866 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10867 pop, false);
10868 add_function_usage_to (call_insn, call_fusage);
10869 if (!TARGET_64BIT)
10870 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10871 /* Indicate that this function can't jump to non-local gotos. */
10872 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10873
10874 /* In order to make call/return prediction work right, we now need
10875 to execute a return instruction. See
10876 libgcc/config/i386/morestack.S for the details on how this works.
10877
10878 For flow purposes gcc must not see this as a return
10879 instruction--we need control flow to continue at the subsequent
10880 label. Therefore, we use an unspec. */
10881 gcc_assert (crtl->args.pops_args < 65536);
10882 rtx_insn *ret_insn
10883 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10884
10885 if ((flag_cf_protection & CF_BRANCH))
10886 {
10887 /* Insert ENDBR since __morestack will jump back here via indirect
10888 call. */
10889 rtx cet_eb = gen_nop_endbr ();
10890 emit_insn_after (cet_eb, ret_insn);
10891 }
10892
10893 /* If we are in 64-bit mode and this function uses a static chain,
10894 we saved %r10 in %rax before calling _morestack. */
10895 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10896 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10897 gen_rtx_REG (word_mode, AX_REG));
10898
10899 /* If this function calls va_start, we need to store a pointer to
10900 the arguments on the old stack, because they may not have been
10901 all copied to the new stack. At this point the old stack can be
10902 found at the frame pointer value used by __morestack, because
10903 __morestack has set that up before calling back to us. Here we
10904 store that pointer in a scratch register, and in
10905 ix86_expand_prologue we store the scratch register in a stack
10906 slot. */
10907 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10908 {
10909 rtx frame_reg;
10910 int words;
10911
10912 scratch_regno = split_stack_prologue_scratch_regno ();
10913 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10914 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10915
10916 /* 64-bit:
10917 fp -> old fp value
10918 return address within this function
10919 return address of caller of this function
10920 stack arguments
10921 So we add three words to get to the stack arguments.
10922
10923 32-bit:
10924 fp -> old fp value
10925 return address within this function
10926 first argument to __morestack
10927 second argument to __morestack
10928 return address of caller of this function
10929 stack arguments
10930 So we add five words to get to the stack arguments.
10931 */
10932 words = TARGET_64BIT ? 3 : 5;
10933 emit_insn (gen_rtx_SET (scratch_reg,
10934 plus_constant (Pmode, frame_reg,
10935 words * UNITS_PER_WORD)));
10936
10937 varargs_label = gen_label_rtx ();
10938 emit_jump_insn (gen_jump (varargs_label));
10939 JUMP_LABEL (get_last_insn ()) = varargs_label;
10940
10941 emit_barrier ();
10942 }
10943
10944 emit_label (label);
10945 LABEL_NUSES (label) = 1;
10946
10947 /* If this function calls va_start, we now have to set the scratch
10948 register for the case where we do not call __morestack. In this
10949 case we need to set it based on the stack pointer. */
10950 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10951 {
10952 emit_insn (gen_rtx_SET (scratch_reg,
10953 plus_constant (Pmode, stack_pointer_rtx,
10954 UNITS_PER_WORD)));
10955
10956 emit_label (varargs_label);
10957 LABEL_NUSES (varargs_label) = 1;
10958 }
10959}
10960
10961/* We may have to tell the dataflow pass that the split stack prologue
10962 is initializing a scratch register. */
10963
10964static void
10965ix86_live_on_entry (bitmap regs)
10966{
10967 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10968 {
10969 gcc_assert (flag_split_stack);
10970 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10971 }
10972}
10973
10974/* Extract the parts of an RTL expression that is a valid memory address
10975 for an instruction. Return false if the structure of the address is
10976 grossly off. */
10977
10978bool
10979ix86_decompose_address (rtx addr, struct ix86_address *out)
10980{
10981 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10982 rtx base_reg, index_reg;
10983 HOST_WIDE_INT scale = 1;
10984 rtx scale_rtx = NULL_RTX;
10985 rtx tmp;
10986 addr_space_t seg = ADDR_SPACE_GENERIC;
10987
10988 /* Allow zero-extended SImode addresses,
10989 they will be emitted with addr32 prefix. */
10990 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10991 {
10992 if (GET_CODE (addr) == ZERO_EXTEND
10993 && GET_MODE (XEXP (addr, 0)) == SImode)
10994 {
10995 addr = XEXP (addr, 0);
10996 if (CONST_INT_P (addr))
10997 return false;
10998 }
10999 else if (GET_CODE (addr) == AND)
11000 {
11001 rtx mask = XEXP (addr, 1);
11002 rtx shift_val;
11003
11004 if (const_32bit_mask (mask, DImode)
11005 /* For ASHIFT inside AND, combine will not generate
11006 canonical zero-extend. Merge mask for AND and shift_count
11007 to check if it is canonical zero-extend. */
11008 || (CONST_INT_P (mask)
11009 && GET_CODE (XEXP (addr, 0)) == ASHIFT
11010 && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
11011 && ((UINTVAL (mask)
11012 | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
11013 == HOST_WIDE_INT_UC (0xffffffff))))
11014 {
11015 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
11016 if (addr == NULL_RTX)
11017 return false;
11018
11019 if (CONST_INT_P (addr))
11020 return false;
11021 }
11022 }
11023 }
11024
11025 /* Allow SImode subregs of DImode addresses,
11026 they will be emitted with addr32 prefix. */
11027 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11028 {
11029 if (SUBREG_P (addr)
11030 && GET_MODE (SUBREG_REG (addr)) == DImode)
11031 {
11032 addr = SUBREG_REG (addr);
11033 if (CONST_INT_P (addr))
11034 return false;
11035 }
11036 }
11037
11038 if (REG_P (addr))
11039 base = addr;
11040 else if (SUBREG_P (addr))
11041 {
11042 if (REG_P (SUBREG_REG (addr)))
11043 base = addr;
11044 else
11045 return false;
11046 }
11047 else if (GET_CODE (addr) == PLUS)
11048 {
11049 rtx addends[4], op;
11050 int n = 0, i;
11051
11052 op = addr;
11053 do
11054 {
11055 if (n >= 4)
11056 return false;
11057 addends[n++] = XEXP (op, 1);
11058 op = XEXP (op, 0);
11059 }
11060 while (GET_CODE (op) == PLUS);
11061 if (n >= 4)
11062 return false;
11063 addends[n] = op;
11064
11065 for (i = n; i >= 0; --i)
11066 {
11067 op = addends[i];
11068 switch (GET_CODE (op))
11069 {
11070 case MULT:
11071 if (index)
11072 return false;
11073 index = XEXP (op, 0);
11074 scale_rtx = XEXP (op, 1);
11075 break;
11076
11077 case ASHIFT:
11078 if (index)
11079 return false;
11080 index = XEXP (op, 0);
11081 tmp = XEXP (op, 1);
11082 if (!CONST_INT_P (tmp))
11083 return false;
11084 scale = INTVAL (tmp);
11085 if ((unsigned HOST_WIDE_INT) scale > 3)
11086 return false;
11087 scale = 1 << scale;
11088 break;
11089
11090 case ZERO_EXTEND:
11091 op = XEXP (op, 0);
11092 if (GET_CODE (op) != UNSPEC)
11093 return false;
11094 /* FALLTHRU */
11095
11096 case UNSPEC:
11097 if (XINT (op, 1) == UNSPEC_TP
11098 && TARGET_TLS_DIRECT_SEG_REFS
11099 && seg == ADDR_SPACE_GENERIC)
11100 seg = DEFAULT_TLS_SEG_REG;
11101 else
11102 return false;
11103 break;
11104
11105 case SUBREG:
11106 if (!REG_P (SUBREG_REG (op)))
11107 return false;
11108 /* FALLTHRU */
11109
11110 case REG:
11111 if (!base)
11112 base = op;
11113 else if (!index)
11114 index = op;
11115 else
11116 return false;
11117 break;
11118
11119 case CONST:
11120 case CONST_INT:
11121 case SYMBOL_REF:
11122 case LABEL_REF:
11123 if (disp)
11124 return false;
11125 disp = op;
11126 break;
11127
11128 default:
11129 return false;
11130 }
11131 }
11132 }
11133 else if (GET_CODE (addr) == MULT)
11134 {
11135 index = XEXP (addr, 0); /* index*scale */
11136 scale_rtx = XEXP (addr, 1);
11137 }
11138 else if (GET_CODE (addr) == ASHIFT)
11139 {
11140 /* We're called for lea too, which implements ashift on occasion. */
11141 index = XEXP (addr, 0);
11142 tmp = XEXP (addr, 1);
11143 if (!CONST_INT_P (tmp))
11144 return false;
11145 scale = INTVAL (tmp);
11146 if ((unsigned HOST_WIDE_INT) scale > 3)
11147 return false;
11148 scale = 1 << scale;
11149 }
11150 else
11151 disp = addr; /* displacement */
11152
11153 if (index)
11154 {
11155 if (REG_P (index))
11156 ;
11157 else if (SUBREG_P (index)
11158 && REG_P (SUBREG_REG (index)))
11159 ;
11160 else
11161 return false;
11162 }
11163
11164 /* Extract the integral value of scale. */
11165 if (scale_rtx)
11166 {
11167 if (!CONST_INT_P (scale_rtx))
11168 return false;
11169 scale = INTVAL (scale_rtx);
11170 }
11171
11172 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
11173 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
11174
11175 /* Avoid useless 0 displacement. */
11176 if (disp == const0_rtx && (base || index))
11177 disp = NULL_RTX;
11178
11179 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11180 if (base_reg && index_reg && scale == 1
11181 && (REGNO (index_reg) == ARG_POINTER_REGNUM
11182 || REGNO (index_reg) == FRAME_POINTER_REGNUM
11183 || REGNO (index_reg) == SP_REG))
11184 {
11185 std::swap (a&: base, b&: index);
11186 std::swap (a&: base_reg, b&: index_reg);
11187 }
11188
11189 /* Special case: %ebp cannot be encoded as a base without a displacement.
11190 Similarly %r13. */
11191 if (!disp && base_reg
11192 && (REGNO (base_reg) == ARG_POINTER_REGNUM
11193 || REGNO (base_reg) == FRAME_POINTER_REGNUM
11194 || REGNO (base_reg) == BP_REG
11195 || REGNO (base_reg) == R13_REG))
11196 disp = const0_rtx;
11197
11198 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11199 Avoid this by transforming to [%esi+0].
11200 Reload calls address legitimization without cfun defined, so we need
11201 to test cfun for being non-NULL. */
11202 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11203 && base_reg && !index_reg && !disp
11204 && REGNO (base_reg) == SI_REG)
11205 disp = const0_rtx;
11206
11207 /* Special case: encode reg+reg instead of reg*2. */
11208 if (!base && index && scale == 2)
11209 base = index, base_reg = index_reg, scale = 1;
11210
11211 /* Special case: scaling cannot be encoded without base or displacement. */
11212 if (!base && !disp && index && scale != 1)
11213 disp = const0_rtx;
11214
11215 out->base = base;
11216 out->index = index;
11217 out->disp = disp;
11218 out->scale = scale;
11219 out->seg = seg;
11220
11221 return true;
11222}
11223
11224/* Return cost of the memory address x.
11225 For i386, it is better to use a complex address than let gcc copy
11226 the address into a reg and make a new pseudo. But not if the address
11227 requires to two regs - that would mean more pseudos with longer
11228 lifetimes. */
11229static int
11230ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11231{
11232 struct ix86_address parts;
11233 int cost = 1;
11234 int ok = ix86_decompose_address (addr: x, out: &parts);
11235
11236 gcc_assert (ok);
11237
11238 if (parts.base && SUBREG_P (parts.base))
11239 parts.base = SUBREG_REG (parts.base);
11240 if (parts.index && SUBREG_P (parts.index))
11241 parts.index = SUBREG_REG (parts.index);
11242
11243 /* Attempt to minimize number of registers in the address by increasing
11244 address cost for each used register. We don't increase address cost
11245 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11246 is not invariant itself it most likely means that base or index is not
11247 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11248 which is not profitable for x86. */
11249 if (parts.base
11250 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11251 && (current_pass->type == GIMPLE_PASS
11252 || !pic_offset_table_rtx
11253 || !REG_P (parts.base)
11254 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11255 cost++;
11256
11257 if (parts.index
11258 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11259 && (current_pass->type == GIMPLE_PASS
11260 || !pic_offset_table_rtx
11261 || !REG_P (parts.index)
11262 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11263 cost++;
11264
11265 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11266 since it's predecode logic can't detect the length of instructions
11267 and it degenerates to vector decoded. Increase cost of such
11268 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11269 to split such addresses or even refuse such addresses at all.
11270
11271 Following addressing modes are affected:
11272 [base+scale*index]
11273 [scale*index+disp]
11274 [base+index]
11275
11276 The first and last case may be avoidable by explicitly coding the zero in
11277 memory address, but I don't have AMD-K6 machine handy to check this
11278 theory. */
11279
11280 if (TARGET_CPU_P (K6)
11281 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11282 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11283 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11284 cost += 10;
11285
11286 return cost;
11287}
11288
11289/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11290 this is used for to form addresses to local data when -fPIC is in
11291 use. */
11292
11293static bool
11294darwin_local_data_pic (rtx disp)
11295{
11296 return (GET_CODE (disp) == UNSPEC
11297 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11298}
11299
11300/* True if the function symbol operand X should be loaded from GOT.
11301 If CALL_P is true, X is a call operand.
11302
11303 NB: -mno-direct-extern-access doesn't force load from GOT for
11304 call.
11305
11306 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11307 statements, since a PIC register could not be available at the
11308 call site. */
11309
11310bool
11311ix86_force_load_from_GOT_p (rtx x, bool call_p)
11312{
11313 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11314 && !TARGET_PECOFF && !TARGET_MACHO
11315 && (!flag_pic || this_is_asm_operands)
11316 && ix86_cmodel != CM_LARGE
11317 && ix86_cmodel != CM_LARGE_PIC
11318 && GET_CODE (x) == SYMBOL_REF
11319 && ((!call_p
11320 && (!ix86_direct_extern_access
11321 || (SYMBOL_REF_DECL (x)
11322 && lookup_attribute (attr_name: "nodirect_extern_access",
11323 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11324 || (SYMBOL_REF_FUNCTION_P (x)
11325 && (!flag_plt
11326 || (SYMBOL_REF_DECL (x)
11327 && lookup_attribute (attr_name: "noplt",
11328 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11329 && !SYMBOL_REF_LOCAL_P (x));
11330}
11331
11332/* Determine if a given RTX is a valid constant. We already know this
11333 satisfies CONSTANT_P. */
11334
11335static bool
11336ix86_legitimate_constant_p (machine_mode mode, rtx x)
11337{
11338 switch (GET_CODE (x))
11339 {
11340 case CONST:
11341 x = XEXP (x, 0);
11342
11343 if (GET_CODE (x) == PLUS)
11344 {
11345 if (!CONST_INT_P (XEXP (x, 1)))
11346 return false;
11347 x = XEXP (x, 0);
11348 }
11349
11350 if (TARGET_MACHO && darwin_local_data_pic (disp: x))
11351 return true;
11352
11353 /* Only some unspecs are valid as "constants". */
11354 if (GET_CODE (x) == UNSPEC)
11355 switch (XINT (x, 1))
11356 {
11357 case UNSPEC_GOT:
11358 case UNSPEC_GOTOFF:
11359 case UNSPEC_PLTOFF:
11360 return TARGET_64BIT;
11361 case UNSPEC_TPOFF:
11362 case UNSPEC_NTPOFF:
11363 x = XVECEXP (x, 0, 0);
11364 return (GET_CODE (x) == SYMBOL_REF
11365 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11366 case UNSPEC_DTPOFF:
11367 x = XVECEXP (x, 0, 0);
11368 return (GET_CODE (x) == SYMBOL_REF
11369 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11370 case UNSPEC_SECREL32:
11371 x = XVECEXP (x, 0, 0);
11372 return GET_CODE (x) == SYMBOL_REF;
11373 default:
11374 return false;
11375 }
11376
11377 /* We must have drilled down to a symbol. */
11378 if (GET_CODE (x) == LABEL_REF)
11379 return true;
11380 if (GET_CODE (x) != SYMBOL_REF)
11381 return false;
11382 /* FALLTHRU */
11383
11384 case SYMBOL_REF:
11385 /* TLS symbols are never valid. */
11386 if (SYMBOL_REF_TLS_MODEL (x))
11387 return false;
11388
11389 /* DLLIMPORT symbols are never valid. */
11390 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11391 && SYMBOL_REF_DLLIMPORT_P (x))
11392 return false;
11393
11394#if TARGET_MACHO
11395 /* mdynamic-no-pic */
11396 if (MACHO_DYNAMIC_NO_PIC_P)
11397 return machopic_symbol_defined_p (x);
11398#endif
11399
11400 /* External function address should be loaded
11401 via the GOT slot to avoid PLT. */
11402 if (ix86_force_load_from_GOT_p (x))
11403 return false;
11404
11405 break;
11406
11407 CASE_CONST_SCALAR_INT:
11408 if (ix86_endbr_immediate_operand (x, VOIDmode))
11409 return false;
11410
11411 switch (mode)
11412 {
11413 case E_TImode:
11414 if (TARGET_64BIT)
11415 return true;
11416 /* FALLTHRU */
11417 case E_OImode:
11418 case E_XImode:
11419 if (!standard_sse_constant_p (x, pred_mode: mode)
11420 && GET_MODE_SIZE (TARGET_AVX512F
11421 ? XImode
11422 : (TARGET_AVX
11423 ? OImode
11424 : (TARGET_SSE2
11425 ? TImode : DImode))) < GET_MODE_SIZE (mode))
11426 return false;
11427 default:
11428 break;
11429 }
11430 break;
11431
11432 case CONST_VECTOR:
11433 if (!standard_sse_constant_p (x, pred_mode: mode))
11434 return false;
11435 break;
11436
11437 case CONST_DOUBLE:
11438 if (mode == E_BFmode)
11439 return false;
11440
11441 default:
11442 break;
11443 }
11444
11445 /* Otherwise we handle everything else in the move patterns. */
11446 return true;
11447}
11448
11449/* Determine if it's legal to put X into the constant pool. This
11450 is not possible for the address of thread-local symbols, which
11451 is checked above. */
11452
11453static bool
11454ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11455{
11456 /* We can put any immediate constant in memory. */
11457 switch (GET_CODE (x))
11458 {
11459 CASE_CONST_ANY:
11460 return false;
11461
11462 default:
11463 break;
11464 }
11465
11466 return !ix86_legitimate_constant_p (mode, x);
11467}
11468
11469/* Return a unique alias set for the GOT. */
11470
11471alias_set_type
11472ix86_GOT_alias_set (void)
11473{
11474 static alias_set_type set = -1;
11475 if (set == -1)
11476 set = new_alias_set ();
11477 return set;
11478}
11479
11480/* Nonzero if the constant value X is a legitimate general operand
11481 when generating PIC code. It is given that flag_pic is on and
11482 that X satisfies CONSTANT_P. */
11483
11484bool
11485legitimate_pic_operand_p (rtx x)
11486{
11487 rtx inner;
11488
11489 switch (GET_CODE (x))
11490 {
11491 case CONST:
11492 inner = XEXP (x, 0);
11493 if (GET_CODE (inner) == PLUS
11494 && CONST_INT_P (XEXP (inner, 1)))
11495 inner = XEXP (inner, 0);
11496
11497 /* Only some unspecs are valid as "constants". */
11498 if (GET_CODE (inner) == UNSPEC)
11499 switch (XINT (inner, 1))
11500 {
11501 case UNSPEC_GOT:
11502 case UNSPEC_GOTOFF:
11503 case UNSPEC_PLTOFF:
11504 return TARGET_64BIT;
11505 case UNSPEC_TPOFF:
11506 x = XVECEXP (inner, 0, 0);
11507 return (GET_CODE (x) == SYMBOL_REF
11508 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11509 case UNSPEC_SECREL32:
11510 x = XVECEXP (inner, 0, 0);
11511 return GET_CODE (x) == SYMBOL_REF;
11512 case UNSPEC_MACHOPIC_OFFSET:
11513 return legitimate_pic_address_disp_p (x);
11514 default:
11515 return false;
11516 }
11517 /* FALLTHRU */
11518
11519 case SYMBOL_REF:
11520 case LABEL_REF:
11521 return legitimate_pic_address_disp_p (x);
11522
11523 default:
11524 return true;
11525 }
11526}
11527
11528/* Determine if a given CONST RTX is a valid memory displacement
11529 in PIC mode. */
11530
11531bool
11532legitimate_pic_address_disp_p (rtx disp)
11533{
11534 bool saw_plus;
11535
11536 /* In 64bit mode we can allow direct addresses of symbols and labels
11537 when they are not dynamic symbols. */
11538 if (TARGET_64BIT)
11539 {
11540 rtx op0 = disp, op1;
11541
11542 switch (GET_CODE (disp))
11543 {
11544 case LABEL_REF:
11545 return true;
11546
11547 case CONST:
11548 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11549 break;
11550 op0 = XEXP (XEXP (disp, 0), 0);
11551 op1 = XEXP (XEXP (disp, 0), 1);
11552 if (!CONST_INT_P (op1))
11553 break;
11554 if (GET_CODE (op0) == UNSPEC
11555 && (XINT (op0, 1) == UNSPEC_DTPOFF
11556 || XINT (op0, 1) == UNSPEC_NTPOFF)
11557 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11558 return true;
11559 if (INTVAL (op1) >= 16*1024*1024
11560 || INTVAL (op1) < -16*1024*1024)
11561 break;
11562 if (GET_CODE (op0) == LABEL_REF)
11563 return true;
11564 if (GET_CODE (op0) == CONST
11565 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11566 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11567 return true;
11568 if (GET_CODE (op0) == UNSPEC
11569 && XINT (op0, 1) == UNSPEC_PCREL)
11570 return true;
11571 if (GET_CODE (op0) != SYMBOL_REF)
11572 break;
11573 /* FALLTHRU */
11574
11575 case SYMBOL_REF:
11576 /* TLS references should always be enclosed in UNSPEC.
11577 The dllimported symbol needs always to be resolved. */
11578 if (SYMBOL_REF_TLS_MODEL (op0)
11579 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11580 return false;
11581
11582 if (TARGET_PECOFF)
11583 {
11584#if TARGET_PECOFF
11585 if (is_imported_p (op0))
11586 return true;
11587#endif
11588
11589 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11590 break;
11591
11592 /* Non-external-weak function symbols need to be resolved only
11593 for the large model. Non-external symbols don't need to be
11594 resolved for large and medium models. For the small model,
11595 we don't need to resolve anything here. */
11596 if ((ix86_cmodel != CM_LARGE_PIC
11597 && SYMBOL_REF_FUNCTION_P (op0)
11598 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11599 || !SYMBOL_REF_EXTERNAL_P (op0)
11600 || ix86_cmodel == CM_SMALL_PIC)
11601 return true;
11602 }
11603 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11604 && (SYMBOL_REF_LOCAL_P (op0)
11605 || ((ix86_direct_extern_access
11606 && !(SYMBOL_REF_DECL (op0)
11607 && lookup_attribute (attr_name: "nodirect_extern_access",
11608 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11609 && HAVE_LD_PIE_COPYRELOC
11610 && flag_pie
11611 && !SYMBOL_REF_WEAK (op0)
11612 && !SYMBOL_REF_FUNCTION_P (op0)))
11613 && ix86_cmodel != CM_LARGE_PIC)
11614 return true;
11615 break;
11616
11617 default:
11618 break;
11619 }
11620 }
11621 if (GET_CODE (disp) != CONST)
11622 return false;
11623 disp = XEXP (disp, 0);
11624
11625 if (TARGET_64BIT)
11626 {
11627 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11628 of GOT tables. We should not need these anyway. */
11629 if (GET_CODE (disp) != UNSPEC
11630 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11631 && XINT (disp, 1) != UNSPEC_GOTOFF
11632 && XINT (disp, 1) != UNSPEC_PCREL
11633 && XINT (disp, 1) != UNSPEC_PLTOFF))
11634 return false;
11635
11636 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11637 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11638 return false;
11639 return true;
11640 }
11641
11642 saw_plus = false;
11643 if (GET_CODE (disp) == PLUS)
11644 {
11645 if (!CONST_INT_P (XEXP (disp, 1)))
11646 return false;
11647 disp = XEXP (disp, 0);
11648 saw_plus = true;
11649 }
11650
11651 if (TARGET_MACHO && darwin_local_data_pic (disp))
11652 return true;
11653
11654 if (GET_CODE (disp) != UNSPEC)
11655 return false;
11656
11657 switch (XINT (disp, 1))
11658 {
11659 case UNSPEC_GOT:
11660 if (saw_plus)
11661 return false;
11662 /* We need to check for both symbols and labels because VxWorks loads
11663 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11664 details. */
11665 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11666 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11667 case UNSPEC_GOTOFF:
11668 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11669 While ABI specify also 32bit relocation but we don't produce it in
11670 small PIC model at all. */
11671 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11672 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11673 && !TARGET_64BIT)
11674 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11675 return false;
11676 case UNSPEC_GOTTPOFF:
11677 case UNSPEC_GOTNTPOFF:
11678 case UNSPEC_INDNTPOFF:
11679 if (saw_plus)
11680 return false;
11681 disp = XVECEXP (disp, 0, 0);
11682 return (GET_CODE (disp) == SYMBOL_REF
11683 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11684 case UNSPEC_NTPOFF:
11685 disp = XVECEXP (disp, 0, 0);
11686 return (GET_CODE (disp) == SYMBOL_REF
11687 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11688 case UNSPEC_DTPOFF:
11689 disp = XVECEXP (disp, 0, 0);
11690 return (GET_CODE (disp) == SYMBOL_REF
11691 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11692 case UNSPEC_SECREL32:
11693 disp = XVECEXP (disp, 0, 0);
11694 return GET_CODE (disp) == SYMBOL_REF;
11695 }
11696
11697 return false;
11698}
11699
11700/* Determine if op is suitable RTX for an address register.
11701 Return naked register if a register or a register subreg is
11702 found, otherwise return NULL_RTX. */
11703
11704static rtx
11705ix86_validate_address_register (rtx op)
11706{
11707 machine_mode mode = GET_MODE (op);
11708
11709 /* Only SImode or DImode registers can form the address. */
11710 if (mode != SImode && mode != DImode)
11711 return NULL_RTX;
11712
11713 if (REG_P (op))
11714 return op;
11715 else if (SUBREG_P (op))
11716 {
11717 rtx reg = SUBREG_REG (op);
11718
11719 if (!REG_P (reg))
11720 return NULL_RTX;
11721
11722 mode = GET_MODE (reg);
11723
11724 /* Don't allow SUBREGs that span more than a word. It can
11725 lead to spill failures when the register is one word out
11726 of a two word structure. */
11727 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11728 return NULL_RTX;
11729
11730 /* Allow only SUBREGs of non-eliminable hard registers. */
11731 if (register_no_elim_operand (reg, mode))
11732 return reg;
11733 }
11734
11735 /* Op is not a register. */
11736 return NULL_RTX;
11737}
11738
11739/* Determine which memory address register set insn can use. */
11740
11741static enum attr_addr
11742ix86_memory_address_reg_class (rtx_insn* insn)
11743{
11744 /* LRA can do some initialization with NULL insn,
11745 return maximum register class in this case. */
11746 enum attr_addr addr_rclass = ADDR_GPR32;
11747
11748 if (!insn)
11749 return addr_rclass;
11750
11751 if (asm_noperands (PATTERN (insn)) >= 0
11752 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11753 return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11754
11755 /* Return maximum register class for unrecognized instructions. */
11756 if (INSN_CODE (insn) < 0)
11757 return addr_rclass;
11758
11759 /* Try to recognize the insn before calling get_attr_addr.
11760 Save current recog_data and current alternative. */
11761 struct recog_data_d saved_recog_data = recog_data;
11762 int saved_alternative = which_alternative;
11763
11764 /* Update recog_data for processing of alternatives. */
11765 extract_insn_cached (insn);
11766
11767 /* If current alternative is not set, loop throught enabled
11768 alternatives and get the most limited register class. */
11769 if (saved_alternative == -1)
11770 {
11771 alternative_mask enabled = get_enabled_alternatives (insn);
11772
11773 for (int i = 0; i < recog_data.n_alternatives; i++)
11774 {
11775 if (!TEST_BIT (enabled, i))
11776 continue;
11777
11778 which_alternative = i;
11779 addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11780 }
11781 }
11782 else
11783 {
11784 which_alternative = saved_alternative;
11785 addr_rclass = get_attr_addr (insn);
11786 }
11787
11788 recog_data = saved_recog_data;
11789 which_alternative = saved_alternative;
11790
11791 return addr_rclass;
11792}
11793
11794/* Return memory address register class insn can use. */
11795
11796enum reg_class
11797ix86_insn_base_reg_class (rtx_insn* insn)
11798{
11799 switch (ix86_memory_address_reg_class (insn))
11800 {
11801 case ADDR_GPR8:
11802 return LEGACY_GENERAL_REGS;
11803 case ADDR_GPR16:
11804 return GENERAL_GPR16;
11805 case ADDR_GPR32:
11806 break;
11807 default:
11808 gcc_unreachable ();
11809 }
11810
11811 return BASE_REG_CLASS;
11812}
11813
11814bool
11815ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11816{
11817 switch (ix86_memory_address_reg_class (insn))
11818 {
11819 case ADDR_GPR8:
11820 return LEGACY_INT_REGNO_P (regno);
11821 case ADDR_GPR16:
11822 return GENERAL_GPR16_REGNO_P (regno);
11823 case ADDR_GPR32:
11824 break;
11825 default:
11826 gcc_unreachable ();
11827 }
11828
11829 return GENERAL_REGNO_P (regno);
11830}
11831
11832enum reg_class
11833ix86_insn_index_reg_class (rtx_insn* insn)
11834{
11835 switch (ix86_memory_address_reg_class (insn))
11836 {
11837 case ADDR_GPR8:
11838 return LEGACY_INDEX_REGS;
11839 case ADDR_GPR16:
11840 return INDEX_GPR16;
11841 case ADDR_GPR32:
11842 break;
11843 default:
11844 gcc_unreachable ();
11845 }
11846
11847 return INDEX_REG_CLASS;
11848}
11849
11850/* Recognizes RTL expressions that are valid memory addresses for an
11851 instruction. The MODE argument is the machine mode for the MEM
11852 expression that wants to use this address.
11853
11854 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11855 convert common non-canonical forms to canonical form so that they will
11856 be recognized. */
11857
11858static bool
11859ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11860 code_helper = ERROR_MARK)
11861{
11862 struct ix86_address parts;
11863 rtx base, index, disp;
11864 HOST_WIDE_INT scale;
11865 addr_space_t seg;
11866
11867 if (ix86_decompose_address (addr, out: &parts) == 0)
11868 /* Decomposition failed. */
11869 return false;
11870
11871 base = parts.base;
11872 index = parts.index;
11873 disp = parts.disp;
11874 scale = parts.scale;
11875 seg = parts.seg;
11876
11877 /* Validate base register. */
11878 if (base)
11879 {
11880 rtx reg = ix86_validate_address_register (op: base);
11881
11882 if (reg == NULL_RTX)
11883 return false;
11884
11885 unsigned int regno = REGNO (reg);
11886 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11887 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11888 /* Base is not valid. */
11889 return false;
11890 }
11891
11892 /* Validate index register. */
11893 if (index)
11894 {
11895 rtx reg = ix86_validate_address_register (op: index);
11896
11897 if (reg == NULL_RTX)
11898 return false;
11899
11900 unsigned int regno = REGNO (reg);
11901 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11902 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11903 /* Index is not valid. */
11904 return false;
11905 }
11906
11907 /* Index and base should have the same mode. */
11908 if (base && index
11909 && GET_MODE (base) != GET_MODE (index))
11910 return false;
11911
11912 /* Address override works only on the (%reg) part of %fs:(%reg). */
11913 if (seg != ADDR_SPACE_GENERIC
11914 && ((base && GET_MODE (base) != word_mode)
11915 || (index && GET_MODE (index) != word_mode)))
11916 return false;
11917
11918 /* Validate scale factor. */
11919 if (scale != 1)
11920 {
11921 if (!index)
11922 /* Scale without index. */
11923 return false;
11924
11925 if (scale != 2 && scale != 4 && scale != 8)
11926 /* Scale is not a valid multiplier. */
11927 return false;
11928 }
11929
11930 /* Validate displacement. */
11931 if (disp)
11932 {
11933 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11934 return false;
11935
11936 if (GET_CODE (disp) == CONST
11937 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11938 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11939 switch (XINT (XEXP (disp, 0), 1))
11940 {
11941 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11942 when used. While ABI specify also 32bit relocations, we
11943 don't produce them at all and use IP relative instead.
11944 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11945 should be loaded via GOT. */
11946 case UNSPEC_GOT:
11947 if (!TARGET_64BIT
11948 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11949 goto is_legitimate_pic;
11950 /* FALLTHRU */
11951 case UNSPEC_GOTOFF:
11952 gcc_assert (flag_pic);
11953 if (!TARGET_64BIT)
11954 goto is_legitimate_pic;
11955
11956 /* 64bit address unspec. */
11957 return false;
11958
11959 case UNSPEC_GOTPCREL:
11960 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11961 goto is_legitimate_pic;
11962 /* FALLTHRU */
11963 case UNSPEC_PCREL:
11964 gcc_assert (flag_pic);
11965 goto is_legitimate_pic;
11966
11967 case UNSPEC_GOTTPOFF:
11968 case UNSPEC_GOTNTPOFF:
11969 case UNSPEC_INDNTPOFF:
11970 case UNSPEC_NTPOFF:
11971 case UNSPEC_DTPOFF:
11972 case UNSPEC_SECREL32:
11973 break;
11974
11975 default:
11976 /* Invalid address unspec. */
11977 return false;
11978 }
11979
11980 else if (SYMBOLIC_CONST (disp)
11981 && (flag_pic
11982#if TARGET_MACHO
11983 || (MACHOPIC_INDIRECT
11984 && !machopic_operand_p (disp))
11985#endif
11986 ))
11987 {
11988
11989 is_legitimate_pic:
11990 if (TARGET_64BIT && (index || base))
11991 {
11992 /* foo@dtpoff(%rX) is ok. */
11993 if (GET_CODE (disp) != CONST
11994 || GET_CODE (XEXP (disp, 0)) != PLUS
11995 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11996 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11997 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11998 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
11999 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
12000 /* Non-constant pic memory reference. */
12001 return false;
12002 }
12003 else if ((!TARGET_MACHO || flag_pic)
12004 && ! legitimate_pic_address_disp_p (disp))
12005 /* Displacement is an invalid pic construct. */
12006 return false;
12007#if TARGET_MACHO
12008 else if (MACHO_DYNAMIC_NO_PIC_P
12009 && !ix86_legitimate_constant_p (Pmode, disp))
12010 /* displacment must be referenced via non_lazy_pointer */
12011 return false;
12012#endif
12013
12014 /* This code used to verify that a symbolic pic displacement
12015 includes the pic_offset_table_rtx register.
12016
12017 While this is good idea, unfortunately these constructs may
12018 be created by "adds using lea" optimization for incorrect
12019 code like:
12020
12021 int a;
12022 int foo(int i)
12023 {
12024 return *(&a+i);
12025 }
12026
12027 This code is nonsensical, but results in addressing
12028 GOT table with pic_offset_table_rtx base. We can't
12029 just refuse it easily, since it gets matched by
12030 "addsi3" pattern, that later gets split to lea in the
12031 case output register differs from input. While this
12032 can be handled by separate addsi pattern for this case
12033 that never results in lea, this seems to be easier and
12034 correct fix for crash to disable this test. */
12035 }
12036 else if (GET_CODE (disp) != LABEL_REF
12037 && !CONST_INT_P (disp)
12038 && (GET_CODE (disp) != CONST
12039 || !ix86_legitimate_constant_p (Pmode, x: disp))
12040 && (GET_CODE (disp) != SYMBOL_REF
12041 || !ix86_legitimate_constant_p (Pmode, x: disp)))
12042 /* Displacement is not constant. */
12043 return false;
12044 else if (TARGET_64BIT
12045 && !x86_64_immediate_operand (disp, VOIDmode))
12046 /* Displacement is out of range. */
12047 return false;
12048 /* In x32 mode, constant addresses are sign extended to 64bit, so
12049 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12050 else if (TARGET_X32 && !(index || base)
12051 && CONST_INT_P (disp)
12052 && val_signbit_known_set_p (SImode, INTVAL (disp)))
12053 return false;
12054 }
12055
12056 /* Everything looks valid. */
12057 return true;
12058}
12059
12060/* Determine if a given RTX is a valid constant address. */
12061
12062bool
12063constant_address_p (rtx x)
12064{
12065 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1);
12066}
12067
12068
12069/* Return a legitimate reference for ORIG (an address) using the
12070 register REG. If REG is 0, a new pseudo is generated.
12071
12072 There are two types of references that must be handled:
12073
12074 1. Global data references must load the address from the GOT, via
12075 the PIC reg. An insn is emitted to do this load, and the reg is
12076 returned.
12077
12078 2. Static data references, constant pool addresses, and code labels
12079 compute the address as an offset from the GOT, whose base is in
12080 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12081 differentiate them from global data objects. The returned
12082 address is the PIC reg + an unspec constant.
12083
12084 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12085 reg also appears in the address. */
12086
12087rtx
12088legitimize_pic_address (rtx orig, rtx reg)
12089{
12090 rtx addr = orig;
12091 rtx new_rtx = orig;
12092
12093#if TARGET_MACHO
12094 if (TARGET_MACHO && !TARGET_64BIT)
12095 {
12096 if (reg == 0)
12097 reg = gen_reg_rtx (Pmode);
12098 /* Use the generic Mach-O PIC machinery. */
12099 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12100 }
12101#endif
12102
12103 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12104 {
12105#if TARGET_PECOFF
12106 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12107 if (tmp)
12108 return tmp;
12109#endif
12110 }
12111
12112 if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr))
12113 new_rtx = addr;
12114 else if ((!TARGET_64BIT
12115 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
12116 && !TARGET_PECOFF
12117 && gotoff_operand (addr, Pmode))
12118 {
12119 /* This symbol may be referenced via a displacement
12120 from the PIC base address (@GOTOFF). */
12121 if (GET_CODE (addr) == CONST)
12122 addr = XEXP (addr, 0);
12123
12124 if (GET_CODE (addr) == PLUS)
12125 {
12126 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12127 UNSPEC_GOTOFF);
12128 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12129 }
12130 else
12131 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12132
12133 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12134
12135 if (TARGET_64BIT)
12136 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12137
12138 if (reg != 0)
12139 {
12140 gcc_assert (REG_P (reg));
12141 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12142 new_rtx, reg, 1, OPTAB_DIRECT);
12143 }
12144 else
12145 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12146 }
12147 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12148 /* We can't always use @GOTOFF for text labels
12149 on VxWorks, see gotoff_operand. */
12150 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12151 {
12152#if TARGET_PECOFF
12153 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12154 if (tmp)
12155 return tmp;
12156#endif
12157
12158 /* For x64 PE-COFF there is no GOT table,
12159 so we use address directly. */
12160 if (TARGET_64BIT && TARGET_PECOFF)
12161 {
12162 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12163 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12164 }
12165 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12166 {
12167 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
12168 UNSPEC_GOTPCREL);
12169 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12170 new_rtx = gen_const_mem (Pmode, new_rtx);
12171 set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12172 }
12173 else
12174 {
12175 /* This symbol must be referenced via a load
12176 from the Global Offset Table (@GOT). */
12177 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12178 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12179
12180 if (TARGET_64BIT)
12181 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12182
12183 if (reg != 0)
12184 {
12185 gcc_assert (REG_P (reg));
12186 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
12187 new_rtx, reg, 1, OPTAB_DIRECT);
12188 }
12189 else
12190 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12191
12192 new_rtx = gen_const_mem (Pmode, new_rtx);
12193 set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
12194 }
12195
12196 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
12197 }
12198 else
12199 {
12200 if (CONST_INT_P (addr)
12201 && !x86_64_immediate_operand (addr, VOIDmode))
12202 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
12203 else if (GET_CODE (addr) == CONST)
12204 {
12205 addr = XEXP (addr, 0);
12206
12207 /* We must match stuff we generate before. Assume the only
12208 unspecs that can get here are ours. Not that we could do
12209 anything with them anyway.... */
12210 if (GET_CODE (addr) == UNSPEC
12211 || (GET_CODE (addr) == PLUS
12212 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12213 return orig;
12214 gcc_assert (GET_CODE (addr) == PLUS);
12215 }
12216
12217 if (GET_CODE (addr) == PLUS)
12218 {
12219 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12220
12221 /* Check first to see if this is a constant
12222 offset from a @GOTOFF symbol reference. */
12223 if (!TARGET_PECOFF
12224 && gotoff_operand (op0, Pmode)
12225 && CONST_INT_P (op1))
12226 {
12227 if (!TARGET_64BIT)
12228 {
12229 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12230 UNSPEC_GOTOFF);
12231 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12232 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12233
12234 if (reg != 0)
12235 {
12236 gcc_assert (REG_P (reg));
12237 new_rtx = expand_simple_binop (Pmode, PLUS,
12238 pic_offset_table_rtx,
12239 new_rtx, reg, 1,
12240 OPTAB_DIRECT);
12241 }
12242 else
12243 new_rtx
12244 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12245 }
12246 else
12247 {
12248 if (INTVAL (op1) < -16*1024*1024
12249 || INTVAL (op1) >= 16*1024*1024)
12250 {
12251 if (!x86_64_immediate_operand (op1, Pmode))
12252 op1 = force_reg (Pmode, op1);
12253
12254 new_rtx
12255 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12256 }
12257 }
12258 }
12259 else
12260 {
12261 rtx base = legitimize_pic_address (orig: op0, reg);
12262 machine_mode mode = GET_MODE (base);
12263 new_rtx
12264 = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg);
12265
12266 if (CONST_INT_P (new_rtx))
12267 {
12268 if (INTVAL (new_rtx) < -16*1024*1024
12269 || INTVAL (new_rtx) >= 16*1024*1024)
12270 {
12271 if (!x86_64_immediate_operand (new_rtx, mode))
12272 new_rtx = force_reg (mode, new_rtx);
12273
12274 new_rtx
12275 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12276 }
12277 else
12278 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12279 }
12280 else
12281 {
12282 /* For %rip addressing, we have to use
12283 just disp32, not base nor index. */
12284 if (TARGET_64BIT
12285 && (GET_CODE (base) == SYMBOL_REF
12286 || GET_CODE (base) == LABEL_REF))
12287 base = force_reg (mode, base);
12288 if (GET_CODE (new_rtx) == PLUS
12289 && CONSTANT_P (XEXP (new_rtx, 1)))
12290 {
12291 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12292 new_rtx = XEXP (new_rtx, 1);
12293 }
12294 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12295 }
12296 }
12297 }
12298 }
12299 return new_rtx;
12300}
12301
12302/* Load the thread pointer. If TO_REG is true, force it into a register. */
12303
12304static rtx
12305get_thread_pointer (machine_mode tp_mode, bool to_reg)
12306{
12307 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12308
12309 if (GET_MODE (tp) != tp_mode)
12310 {
12311 gcc_assert (GET_MODE (tp) == SImode);
12312 gcc_assert (tp_mode == DImode);
12313
12314 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12315 }
12316
12317 if (to_reg)
12318 tp = copy_to_mode_reg (tp_mode, tp);
12319
12320 return tp;
12321}
12322
12323/* Construct the SYMBOL_REF for the _tls_index symbol. */
12324
12325static GTY(()) rtx ix86_tls_index_symbol;
12326
12327#if TARGET_WIN32_TLS
12328static rtx
12329ix86_tls_index (void)
12330{
12331 if (!ix86_tls_index_symbol)
12332 ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
12333
12334 if (flag_pic)
12335 return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL));
12336 else
12337 return ix86_tls_index_symbol;
12338}
12339#endif
12340
12341/* Construct the SYMBOL_REF for the tls_get_addr function. */
12342
12343static GTY(()) rtx ix86_tls_symbol;
12344
12345static rtx
12346ix86_tls_get_addr (void)
12347{
12348 if (!ix86_tls_symbol)
12349 {
12350 const char *sym
12351 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12352 ? "___tls_get_addr" : "__tls_get_addr");
12353
12354 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12355 }
12356
12357 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12358 {
12359 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12360 UNSPEC_PLTOFF);
12361 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12362 gen_rtx_CONST (Pmode, unspec));
12363 }
12364
12365 return ix86_tls_symbol;
12366}
12367
12368/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12369
12370static GTY(()) rtx ix86_tls_module_base_symbol;
12371
12372rtx
12373ix86_tls_module_base (void)
12374{
12375 if (!ix86_tls_module_base_symbol)
12376 {
12377 ix86_tls_module_base_symbol
12378 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12379
12380 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12381 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12382 }
12383
12384 return ix86_tls_module_base_symbol;
12385}
12386
12387/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12388 false if we expect this to be used for a memory address and true if
12389 we expect to load the address into a register. */
12390
12391rtx
12392legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12393{
12394 rtx dest, base, off;
12395 rtx pic = NULL_RTX, tp = NULL_RTX;
12396 machine_mode tp_mode = Pmode;
12397 int type;
12398
12399#if TARGET_WIN32_TLS
12400 off = gen_const_mem (SImode, ix86_tls_index ());
12401 set_mem_alias_set (off, GOT_ALIAS_SET);
12402
12403 tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44));
12404 set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG);
12405
12406 if (TARGET_64BIT)
12407 off = convert_to_mode (Pmode, off, 1);
12408
12409 base = force_reg (Pmode, off);
12410 tp = copy_to_mode_reg (Pmode, tp);
12411
12412 tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD))));
12413 set_mem_alias_set (tp, GOT_ALIAS_SET);
12414
12415 base = force_reg (Pmode, tp);
12416
12417 return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32)));
12418#else
12419 /* Fall back to global dynamic model if tool chain cannot support local
12420 dynamic. */
12421 if (TARGET_SUN_TLS && !TARGET_64BIT
12422 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12423 && model == TLS_MODEL_LOCAL_DYNAMIC)
12424 model = TLS_MODEL_GLOBAL_DYNAMIC;
12425
12426 switch (model)
12427 {
12428 case TLS_MODEL_GLOBAL_DYNAMIC:
12429 if (!TARGET_64BIT)
12430 {
12431 if (flag_pic && !TARGET_PECOFF)
12432 pic = pic_offset_table_rtx;
12433 else
12434 {
12435 pic = gen_reg_rtx (Pmode);
12436 emit_insn (gen_set_got (pic));
12437 }
12438 }
12439
12440 if (TARGET_GNU2_TLS)
12441 {
12442 dest = gen_reg_rtx (ptr_mode);
12443 if (TARGET_64BIT)
12444 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x));
12445 else
12446 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12447
12448 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12449 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12450 if (GET_MODE (dest) != Pmode)
12451 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12452 dest = force_reg (Pmode, dest);
12453
12454 if (GET_MODE (x) != Pmode)
12455 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12456
12457 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12458 }
12459 else
12460 {
12461 rtx caddr = ix86_tls_get_addr ();
12462
12463 dest = gen_reg_rtx (Pmode);
12464 if (TARGET_64BIT)
12465 {
12466 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12467 rtx_insn *insns;
12468
12469 start_sequence ();
12470 emit_call_insn
12471 (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr));
12472 insns = end_sequence ();
12473
12474 if (GET_MODE (x) != Pmode)
12475 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12476
12477 RTL_CONST_CALL_P (insns) = 1;
12478 emit_libcall_block (insns, dest, rax, x);
12479 }
12480 else
12481 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12482 }
12483 break;
12484
12485 case TLS_MODEL_LOCAL_DYNAMIC:
12486 if (!TARGET_64BIT)
12487 {
12488 if (flag_pic)
12489 pic = pic_offset_table_rtx;
12490 else
12491 {
12492 pic = gen_reg_rtx (Pmode);
12493 emit_insn (gen_set_got (pic));
12494 }
12495 }
12496
12497 if (TARGET_GNU2_TLS)
12498 {
12499 rtx tmp = ix86_tls_module_base ();
12500
12501 base = gen_reg_rtx (ptr_mode);
12502 if (TARGET_64BIT)
12503 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp));
12504 else
12505 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12506
12507 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12508 if (GET_MODE (base) != Pmode)
12509 base = gen_rtx_ZERO_EXTEND (Pmode, base);
12510 base = force_reg (Pmode, base);
12511 }
12512 else
12513 {
12514 rtx caddr = ix86_tls_get_addr ();
12515
12516 base = gen_reg_rtx (Pmode);
12517 if (TARGET_64BIT)
12518 {
12519 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12520 rtx_insn *insns;
12521 rtx eqv;
12522
12523 start_sequence ();
12524 emit_call_insn
12525 (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr));
12526 insns = end_sequence ();
12527
12528 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12529 share the LD_BASE result with other LD model accesses. */
12530 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12531 UNSPEC_TLS_LD_BASE);
12532
12533 RTL_CONST_CALL_P (insns) = 1;
12534 emit_libcall_block (insns, base, rax, eqv);
12535 }
12536 else
12537 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12538 }
12539
12540 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12541 off = gen_rtx_CONST (Pmode, off);
12542
12543 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12544
12545 if (TARGET_GNU2_TLS)
12546 {
12547 if (GET_MODE (tp) != Pmode)
12548 {
12549 dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode);
12550 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12551 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12552 }
12553 else
12554 dest = gen_rtx_PLUS (Pmode, tp, dest);
12555 dest = force_reg (Pmode, dest);
12556
12557 if (GET_MODE (x) != Pmode)
12558 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12559
12560 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12561 }
12562 break;
12563
12564 case TLS_MODEL_INITIAL_EXEC:
12565 if (TARGET_64BIT)
12566 {
12567 /* Generate DImode references to avoid %fs:(%reg32)
12568 problems and linker IE->LE relaxation bug. */
12569 tp_mode = DImode;
12570 pic = NULL;
12571 type = UNSPEC_GOTNTPOFF;
12572 }
12573 else if (flag_pic)
12574 {
12575 pic = pic_offset_table_rtx;
12576 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12577 }
12578 else if (!TARGET_ANY_GNU_TLS)
12579 {
12580 pic = gen_reg_rtx (Pmode);
12581 emit_insn (gen_set_got (pic));
12582 type = UNSPEC_GOTTPOFF;
12583 }
12584 else
12585 {
12586 pic = NULL;
12587 type = UNSPEC_INDNTPOFF;
12588 }
12589
12590 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12591 off = gen_rtx_CONST (tp_mode, off);
12592 if (pic)
12593 off = gen_rtx_PLUS (tp_mode, pic, off);
12594 off = gen_const_mem (tp_mode, off);
12595 set_mem_alias_set (off, GOT_ALIAS_SET);
12596
12597 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12598 {
12599 base = get_thread_pointer (tp_mode,
12600 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12601 off = force_reg (tp_mode, off);
12602 dest = gen_rtx_PLUS (tp_mode, base, off);
12603 if (tp_mode != Pmode)
12604 dest = convert_to_mode (Pmode, dest, 1);
12605 }
12606 else
12607 {
12608 base = get_thread_pointer (Pmode, to_reg: true);
12609 dest = gen_reg_rtx (Pmode);
12610 emit_insn (gen_sub3_insn (dest, base, off));
12611 }
12612 break;
12613
12614 case TLS_MODEL_LOCAL_EXEC:
12615 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12616 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12617 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12618 off = gen_rtx_CONST (Pmode, off);
12619
12620 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12621 {
12622 base = get_thread_pointer (Pmode,
12623 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12624 return gen_rtx_PLUS (Pmode, base, off);
12625 }
12626 else
12627 {
12628 base = get_thread_pointer (Pmode, to_reg: true);
12629 dest = gen_reg_rtx (Pmode);
12630 emit_insn (gen_sub3_insn (dest, base, off));
12631 }
12632 break;
12633
12634 default:
12635 gcc_unreachable ();
12636 }
12637
12638 return dest;
12639#endif
12640}
12641
12642/* Return true if the TLS address requires insn using integer registers.
12643 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12644 MOV instructions, refer to PR103275. */
12645bool
12646ix86_gpr_tls_address_pattern_p (rtx mem)
12647{
12648 gcc_assert (MEM_P (mem));
12649
12650 rtx addr = XEXP (mem, 0);
12651 subrtx_var_iterator::array_type array;
12652 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12653 {
12654 rtx op = *iter;
12655 if (GET_CODE (op) == UNSPEC)
12656 switch (XINT (op, 1))
12657 {
12658 case UNSPEC_GOTNTPOFF:
12659 return true;
12660 case UNSPEC_TPOFF:
12661 if (!TARGET_64BIT)
12662 return true;
12663 break;
12664 default:
12665 break;
12666 }
12667 }
12668
12669 return false;
12670}
12671
12672/* Return true if OP refers to a TLS address. */
12673bool
12674ix86_tls_address_pattern_p (rtx op)
12675{
12676 subrtx_var_iterator::array_type array;
12677 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12678 {
12679 rtx op = *iter;
12680 if (MEM_P (op))
12681 {
12682 rtx *x = &XEXP (op, 0);
12683 while (GET_CODE (*x) == PLUS)
12684 {
12685 int i;
12686 for (i = 0; i < 2; i++)
12687 {
12688 rtx u = XEXP (*x, i);
12689 if (GET_CODE (u) == ZERO_EXTEND)
12690 u = XEXP (u, 0);
12691 if (GET_CODE (u) == UNSPEC
12692 && XINT (u, 1) == UNSPEC_TP)
12693 return true;
12694 }
12695 x = &XEXP (*x, 0);
12696 }
12697
12698 iter.skip_subrtxes ();
12699 }
12700 }
12701
12702 return false;
12703}
12704
12705/* Rewrite *LOC so that it refers to a default TLS address space. */
12706static void
12707ix86_rewrite_tls_address_1 (rtx *loc)
12708{
12709 subrtx_ptr_iterator::array_type array;
12710 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12711 {
12712 rtx *loc = *iter;
12713 if (MEM_P (*loc))
12714 {
12715 rtx addr = XEXP (*loc, 0);
12716 rtx *x = &addr;
12717 while (GET_CODE (*x) == PLUS)
12718 {
12719 int i;
12720 for (i = 0; i < 2; i++)
12721 {
12722 rtx u = XEXP (*x, i);
12723 if (GET_CODE (u) == ZERO_EXTEND)
12724 u = XEXP (u, 0);
12725 if (GET_CODE (u) == UNSPEC
12726 && XINT (u, 1) == UNSPEC_TP)
12727 {
12728 /* NB: Since address override only applies to the
12729 (reg32) part in fs:(reg32), return if address
12730 override is used. */
12731 if (Pmode != word_mode
12732 && REG_P (XEXP (*x, 1 - i)))
12733 return;
12734
12735 addr_space_t as = DEFAULT_TLS_SEG_REG;
12736
12737 *x = XEXP (*x, 1 - i);
12738
12739 *loc = replace_equiv_address_nv (*loc, addr, true);
12740 set_mem_addr_space (*loc, as);
12741 return;
12742 }
12743 }
12744 x = &XEXP (*x, 0);
12745 }
12746
12747 iter.skip_subrtxes ();
12748 }
12749 }
12750}
12751
12752/* Rewrite instruction pattern involvning TLS address
12753 so that it refers to a default TLS address space. */
12754rtx
12755ix86_rewrite_tls_address (rtx pattern)
12756{
12757 pattern = copy_insn (pattern);
12758 ix86_rewrite_tls_address_1 (loc: &pattern);
12759 return pattern;
12760}
12761
12762/* Try machine-dependent ways of modifying an illegitimate address
12763 to be legitimate. If we find one, return the new, valid address.
12764 This macro is used in only one place: `memory_address' in explow.cc.
12765
12766 OLDX is the address as it was before break_out_memory_refs was called.
12767 In some cases it is useful to look at this to decide what needs to be done.
12768
12769 It is always safe for this macro to do nothing. It exists to recognize
12770 opportunities to optimize the output.
12771
12772 For the 80386, we handle X+REG by loading X into a register R and
12773 using R+REG. R will go in a general reg and indexing will be used.
12774 However, if REG is a broken-out memory address or multiplication,
12775 nothing needs to be done because REG can certainly go in a general reg.
12776
12777 When -fpic is used, special handling is needed for symbolic references.
12778 See comments by legitimize_pic_address in i386.cc for details. */
12779
12780static rtx
12781ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12782{
12783 bool changed = false;
12784 unsigned log;
12785
12786 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12787 if (log)
12788 return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false);
12789 if (GET_CODE (x) == CONST
12790 && GET_CODE (XEXP (x, 0)) == PLUS
12791 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12792 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12793 {
12794 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12795 model: (enum tls_model) log, for_mov: false);
12796 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12797 }
12798
12799 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12800 {
12801#if TARGET_PECOFF
12802 rtx tmp = legitimize_pe_coff_symbol (x, true);
12803 if (tmp)
12804 return tmp;
12805#endif
12806 }
12807
12808 if (flag_pic && SYMBOLIC_CONST (x))
12809 return legitimize_pic_address (orig: x, reg: 0);
12810
12811#if TARGET_MACHO
12812 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12813 return machopic_indirect_data_reference (x, 0);
12814#endif
12815
12816 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12817 if (GET_CODE (x) == ASHIFT
12818 && CONST_INT_P (XEXP (x, 1))
12819 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12820 {
12821 changed = true;
12822 log = INTVAL (XEXP (x, 1));
12823 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12824 GEN_INT (1 << log));
12825 }
12826
12827 if (GET_CODE (x) == PLUS)
12828 {
12829 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12830
12831 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12832 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12833 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12834 {
12835 changed = true;
12836 log = INTVAL (XEXP (XEXP (x, 0), 1));
12837 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12838 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12839 GEN_INT (1 << log));
12840 }
12841
12842 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12843 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12844 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12845 {
12846 changed = true;
12847 log = INTVAL (XEXP (XEXP (x, 1), 1));
12848 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12849 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12850 GEN_INT (1 << log));
12851 }
12852
12853 /* Put multiply first if it isn't already. */
12854 if (GET_CODE (XEXP (x, 1)) == MULT)
12855 {
12856 std::swap (XEXP (x, 0), XEXP (x, 1));
12857 changed = true;
12858 }
12859
12860 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12861 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12862 created by virtual register instantiation, register elimination, and
12863 similar optimizations. */
12864 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12865 {
12866 changed = true;
12867 x = gen_rtx_PLUS (Pmode,
12868 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12869 XEXP (XEXP (x, 1), 0)),
12870 XEXP (XEXP (x, 1), 1));
12871 }
12872
12873 /* Canonicalize
12874 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12875 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12876 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12877 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12878 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12879 && CONSTANT_P (XEXP (x, 1)))
12880 {
12881 rtx constant;
12882 rtx other = NULL_RTX;
12883
12884 if (CONST_INT_P (XEXP (x, 1)))
12885 {
12886 constant = XEXP (x, 1);
12887 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12888 }
12889 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12890 {
12891 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12892 other = XEXP (x, 1);
12893 }
12894 else
12895 constant = 0;
12896
12897 if (constant)
12898 {
12899 changed = true;
12900 x = gen_rtx_PLUS (Pmode,
12901 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12902 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12903 plus_constant (Pmode, other,
12904 INTVAL (constant)));
12905 }
12906 }
12907
12908 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
12909 return x;
12910
12911 if (GET_CODE (XEXP (x, 0)) == MULT)
12912 {
12913 changed = true;
12914 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12915 }
12916
12917 if (GET_CODE (XEXP (x, 1)) == MULT)
12918 {
12919 changed = true;
12920 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12921 }
12922
12923 if (changed
12924 && REG_P (XEXP (x, 1))
12925 && REG_P (XEXP (x, 0)))
12926 return x;
12927
12928 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12929 {
12930 changed = true;
12931 x = legitimize_pic_address (orig: x, reg: 0);
12932 }
12933
12934 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
12935 return x;
12936
12937 if (REG_P (XEXP (x, 0)))
12938 {
12939 rtx temp = gen_reg_rtx (Pmode);
12940 rtx val = force_operand (XEXP (x, 1), temp);
12941 if (val != temp)
12942 {
12943 val = convert_to_mode (Pmode, val, 1);
12944 emit_move_insn (temp, val);
12945 }
12946
12947 XEXP (x, 1) = temp;
12948 return x;
12949 }
12950
12951 else if (REG_P (XEXP (x, 1)))
12952 {
12953 rtx temp = gen_reg_rtx (Pmode);
12954 rtx val = force_operand (XEXP (x, 0), temp);
12955 if (val != temp)
12956 {
12957 val = convert_to_mode (Pmode, val, 1);
12958 emit_move_insn (temp, val);
12959 }
12960
12961 XEXP (x, 0) = temp;
12962 return x;
12963 }
12964 }
12965
12966 return x;
12967}
12968
12969/* Print an integer constant expression in assembler syntax. Addition
12970 and subtraction are the only arithmetic that may appear in these
12971 expressions. FILE is the stdio stream to write to, X is the rtx, and
12972 CODE is the operand print code from the output string. */
12973
12974static void
12975output_pic_addr_const (FILE *file, rtx x, int code)
12976{
12977 char buf[256];
12978
12979 switch (GET_CODE (x))
12980 {
12981 case PC:
12982 gcc_assert (flag_pic);
12983 putc (c: '.', stream: file);
12984 break;
12985
12986 case SYMBOL_REF:
12987 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12988 output_addr_const (file, x);
12989 else
12990 {
12991 const char *name = XSTR (x, 0);
12992
12993 /* Mark the decl as referenced so that cgraph will
12994 output the function. */
12995 if (SYMBOL_REF_DECL (x))
12996 mark_decl_referenced (SYMBOL_REF_DECL (x));
12997
12998#if TARGET_MACHO
12999 if (MACHOPIC_INDIRECT
13000 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13001 name = machopic_indirection_name (x, /*stub_p=*/true);
13002#endif
13003 assemble_name (file, name);
13004 }
13005 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13006 && code == 'P' && ix86_call_use_plt_p (x))
13007 fputs (s: "@PLT", stream: file);
13008 break;
13009
13010 case LABEL_REF:
13011 x = XEXP (x, 0);
13012 /* FALLTHRU */
13013 case CODE_LABEL:
13014 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13015 assemble_name (asm_out_file, buf);
13016 break;
13017
13018 CASE_CONST_SCALAR_INT:
13019 output_addr_const (file, x);
13020 break;
13021
13022 case CONST:
13023 /* This used to output parentheses around the expression,
13024 but that does not work on the 386 (either ATT or BSD assembler). */
13025 output_pic_addr_const (file, XEXP (x, 0), code);
13026 break;
13027
13028 case CONST_DOUBLE:
13029 /* We can't handle floating point constants;
13030 TARGET_PRINT_OPERAND must handle them. */
13031 output_operand_lossage ("floating constant misused");
13032 break;
13033
13034 case PLUS:
13035 /* Some assemblers need integer constants to appear first. */
13036 if (CONST_INT_P (XEXP (x, 0)))
13037 {
13038 output_pic_addr_const (file, XEXP (x, 0), code);
13039 putc (c: '+', stream: file);
13040 output_pic_addr_const (file, XEXP (x, 1), code);
13041 }
13042 else
13043 {
13044 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13045 output_pic_addr_const (file, XEXP (x, 1), code);
13046 putc (c: '+', stream: file);
13047 output_pic_addr_const (file, XEXP (x, 0), code);
13048 }
13049 break;
13050
13051 case MINUS:
13052 if (!TARGET_MACHO)
13053 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file);
13054 output_pic_addr_const (file, XEXP (x, 0), code);
13055 putc (c: '-', stream: file);
13056 output_pic_addr_const (file, XEXP (x, 1), code);
13057 if (!TARGET_MACHO)
13058 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file);
13059 break;
13060
13061 case UNSPEC:
13062 gcc_assert (XVECLEN (x, 0) == 1);
13063 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13064 switch (XINT (x, 1))
13065 {
13066 case UNSPEC_GOT:
13067 fputs (s: "@GOT", stream: file);
13068 break;
13069 case UNSPEC_GOTOFF:
13070 fputs (s: "@GOTOFF", stream: file);
13071 break;
13072 case UNSPEC_PLTOFF:
13073 fputs (s: "@PLTOFF", stream: file);
13074 break;
13075 case UNSPEC_PCREL:
13076 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13077 "(%rip)" : "[rip]", stream: file);
13078 break;
13079 case UNSPEC_GOTPCREL:
13080 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13081 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", stream: file);
13082 break;
13083 case UNSPEC_GOTTPOFF:
13084 /* FIXME: This might be @TPOFF in Sun ld too. */
13085 fputs (s: "@gottpoff", stream: file);
13086 break;
13087 case UNSPEC_TPOFF:
13088 fputs (s: "@tpoff", stream: file);
13089 break;
13090 case UNSPEC_NTPOFF:
13091 if (TARGET_64BIT)
13092 fputs (s: "@tpoff", stream: file);
13093 else
13094 fputs (s: "@ntpoff", stream: file);
13095 break;
13096 case UNSPEC_DTPOFF:
13097 fputs (s: "@dtpoff", stream: file);
13098 break;
13099 case UNSPEC_GOTNTPOFF:
13100 if (TARGET_64BIT)
13101 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13102 "@gottpoff(%rip)": "@gottpoff[rip]", stream: file);
13103 else
13104 fputs (s: "@gotntpoff", stream: file);
13105 break;
13106 case UNSPEC_INDNTPOFF:
13107 fputs (s: "@indntpoff", stream: file);
13108 break;
13109 case UNSPEC_SECREL32:
13110 fputs (s: "@secrel32", stream: file);
13111 break;
13112#if TARGET_MACHO
13113 case UNSPEC_MACHOPIC_OFFSET:
13114 putc ('-', file);
13115 machopic_output_function_base_name (file);
13116 break;
13117#endif
13118 default:
13119 output_operand_lossage ("invalid UNSPEC as operand");
13120 break;
13121 }
13122 break;
13123
13124 default:
13125 output_operand_lossage ("invalid expression as operand");
13126 }
13127}
13128
13129/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13130 We need to emit DTP-relative relocations. */
13131
13132static void ATTRIBUTE_UNUSED
13133i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13134{
13135 fputs (ASM_LONG, stream: file);
13136 output_addr_const (file, x);
13137#if TARGET_WIN32_TLS
13138 fputs ("@secrel32", file);
13139#else
13140 fputs (s: "@dtpoff", stream: file);
13141#endif
13142 switch (size)
13143 {
13144 case 4:
13145 break;
13146 case 8:
13147 fputs (s: ", 0", stream: file);
13148 break;
13149 default:
13150 gcc_unreachable ();
13151 }
13152}
13153
13154/* Return true if X is a representation of the PIC register. This copes
13155 with calls from ix86_find_base_term, where the register might have
13156 been replaced by a cselib value. */
13157
13158static bool
13159ix86_pic_register_p (rtx x)
13160{
13161 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13162 return (pic_offset_table_rtx
13163 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13164 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13165 return true;
13166 else if (!REG_P (x))
13167 return false;
13168 else if (pic_offset_table_rtx)
13169 {
13170 if (REGNO (x) == REGNO (pic_offset_table_rtx))
13171 return true;
13172 if (HARD_REGISTER_P (x)
13173 && !HARD_REGISTER_P (pic_offset_table_rtx)
13174 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13175 return true;
13176 return false;
13177 }
13178 else
13179 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13180}
13181
13182/* Helper function for ix86_delegitimize_address.
13183 Attempt to delegitimize TLS local-exec accesses. */
13184
13185static rtx
13186ix86_delegitimize_tls_address (rtx orig_x)
13187{
13188 rtx x = orig_x, unspec;
13189 struct ix86_address addr;
13190
13191 if (!TARGET_TLS_DIRECT_SEG_REFS)
13192 return orig_x;
13193 if (MEM_P (x))
13194 x = XEXP (x, 0);
13195 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13196 return orig_x;
13197 if (ix86_decompose_address (addr: x, out: &addr) == 0
13198 || addr.seg != DEFAULT_TLS_SEG_REG
13199 || addr.disp == NULL_RTX
13200 || GET_CODE (addr.disp) != CONST)
13201 return orig_x;
13202 unspec = XEXP (addr.disp, 0);
13203 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13204 unspec = XEXP (unspec, 0);
13205 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13206 return orig_x;
13207 x = XVECEXP (unspec, 0, 0);
13208 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13209 if (unspec != XEXP (addr.disp, 0))
13210 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13211 if (addr.index)
13212 {
13213 rtx idx = addr.index;
13214 if (addr.scale != 1)
13215 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13216 x = gen_rtx_PLUS (Pmode, idx, x);
13217 }
13218 if (addr.base)
13219 x = gen_rtx_PLUS (Pmode, addr.base, x);
13220 if (MEM_P (orig_x))
13221 x = replace_equiv_address_nv (orig_x, x);
13222 return x;
13223}
13224
13225/* In the name of slightly smaller debug output, and to cater to
13226 general assembler lossage, recognize PIC+GOTOFF and turn it back
13227 into a direct symbol reference.
13228
13229 On Darwin, this is necessary to avoid a crash, because Darwin
13230 has a different PIC label for each routine but the DWARF debugging
13231 information is not associated with any particular routine, so it's
13232 necessary to remove references to the PIC label from RTL stored by
13233 the DWARF output code.
13234
13235 This helper is used in the normal ix86_delegitimize_address
13236 entrypoint (e.g. used in the target delegitimization hook) and
13237 in ix86_find_base_term. As compile time memory optimization, we
13238 avoid allocating rtxes that will not change anything on the outcome
13239 of the callers (find_base_value and find_base_term). */
13240
13241static inline rtx
13242ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13243{
13244 rtx orig_x = delegitimize_mem_from_attrs (x);
13245 /* addend is NULL or some rtx if x is something+GOTOFF where
13246 something doesn't include the PIC register. */
13247 rtx addend = NULL_RTX;
13248 /* reg_addend is NULL or a multiple of some register. */
13249 rtx reg_addend = NULL_RTX;
13250 /* const_addend is NULL or a const_int. */
13251 rtx const_addend = NULL_RTX;
13252 /* This is the result, or NULL. */
13253 rtx result = NULL_RTX;
13254
13255 x = orig_x;
13256
13257 if (MEM_P (x))
13258 x = XEXP (x, 0);
13259
13260 if (TARGET_64BIT)
13261 {
13262 if (GET_CODE (x) == CONST
13263 && GET_CODE (XEXP (x, 0)) == PLUS
13264 && GET_MODE (XEXP (x, 0)) == Pmode
13265 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13266 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13267 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13268 {
13269 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13270 base. A CONST can't be arg_pointer_rtx based. */
13271 if (base_term_p && MEM_P (orig_x))
13272 return orig_x;
13273 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13274 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13275 if (MEM_P (orig_x))
13276 x = replace_equiv_address_nv (orig_x, x);
13277 return x;
13278 }
13279
13280 if (GET_CODE (x) == CONST
13281 && GET_CODE (XEXP (x, 0)) == UNSPEC
13282 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13283 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13284 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13285 {
13286 x = XVECEXP (XEXP (x, 0), 0, 0);
13287 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13288 {
13289 x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x));
13290 if (x == NULL_RTX)
13291 return orig_x;
13292 }
13293 return x;
13294 }
13295
13296 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13297 return ix86_delegitimize_tls_address (orig_x);
13298
13299 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13300 and -mcmodel=medium -fpic. */
13301 }
13302
13303 if (GET_CODE (x) != PLUS
13304 || GET_CODE (XEXP (x, 1)) != CONST)
13305 return ix86_delegitimize_tls_address (orig_x);
13306
13307 if (ix86_pic_register_p (XEXP (x, 0)))
13308 /* %ebx + GOT/GOTOFF */
13309 ;
13310 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13311 {
13312 /* %ebx + %reg * scale + GOT/GOTOFF */
13313 reg_addend = XEXP (x, 0);
13314 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13315 reg_addend = XEXP (reg_addend, 1);
13316 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13317 reg_addend = XEXP (reg_addend, 0);
13318 else
13319 {
13320 reg_addend = NULL_RTX;
13321 addend = XEXP (x, 0);
13322 }
13323 }
13324 else
13325 addend = XEXP (x, 0);
13326
13327 x = XEXP (XEXP (x, 1), 0);
13328 if (GET_CODE (x) == PLUS
13329 && CONST_INT_P (XEXP (x, 1)))
13330 {
13331 const_addend = XEXP (x, 1);
13332 x = XEXP (x, 0);
13333 }
13334
13335 if (GET_CODE (x) == UNSPEC
13336 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13337 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13338 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13339 && !MEM_P (orig_x) && !addend)))
13340 result = XVECEXP (x, 0, 0);
13341
13342 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x)
13343 && !MEM_P (orig_x))
13344 result = XVECEXP (x, 0, 0);
13345
13346 if (! result)
13347 return ix86_delegitimize_tls_address (orig_x);
13348
13349 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13350 recurse on the first operand. */
13351 if (const_addend && !base_term_p)
13352 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13353 if (reg_addend)
13354 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13355 if (addend)
13356 {
13357 /* If the rest of original X doesn't involve the PIC register, add
13358 addend and subtract pic_offset_table_rtx. This can happen e.g.
13359 for code like:
13360 leal (%ebx, %ecx, 4), %ecx
13361 ...
13362 movl foo@GOTOFF(%ecx), %edx
13363 in which case we return (%ecx - %ebx) + foo
13364 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13365 and reload has completed. Don't do the latter for debug,
13366 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13367 if (pic_offset_table_rtx
13368 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13369 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13370 pic_offset_table_rtx),
13371 result);
13372 else if (base_term_p
13373 && pic_offset_table_rtx
13374 && !TARGET_MACHO
13375 && !TARGET_VXWORKS_RTP)
13376 {
13377 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13378 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13379 result = gen_rtx_PLUS (Pmode, tmp, result);
13380 }
13381 else
13382 return orig_x;
13383 }
13384 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13385 {
13386 result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode);
13387 if (result == NULL_RTX)
13388 return orig_x;
13389 }
13390 return result;
13391}
13392
13393/* The normal instantiation of the above template. */
13394
13395static rtx
13396ix86_delegitimize_address (rtx x)
13397{
13398 return ix86_delegitimize_address_1 (x, base_term_p: false);
13399}
13400
13401/* If X is a machine specific address (i.e. a symbol or label being
13402 referenced as a displacement from the GOT implemented using an
13403 UNSPEC), then return the base term. Otherwise return X. */
13404
13405rtx
13406ix86_find_base_term (rtx x)
13407{
13408 rtx term;
13409
13410 if (TARGET_64BIT)
13411 {
13412 if (GET_CODE (x) != CONST)
13413 return x;
13414 term = XEXP (x, 0);
13415 if (GET_CODE (term) == PLUS
13416 && CONST_INT_P (XEXP (term, 1)))
13417 term = XEXP (term, 0);
13418 if (GET_CODE (term) != UNSPEC
13419 || (XINT (term, 1) != UNSPEC_GOTPCREL
13420 && XINT (term, 1) != UNSPEC_PCREL))
13421 return x;
13422
13423 return XVECEXP (term, 0, 0);
13424 }
13425
13426 return ix86_delegitimize_address_1 (x, base_term_p: true);
13427}
13428
13429/* Return true if X shouldn't be emitted into the debug info.
13430 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13431 symbol easily into the .debug_info section, so we need not to
13432 delegitimize, but instead assemble as @gotoff.
13433 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13434 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13435
13436static bool
13437ix86_const_not_ok_for_debug_p (rtx x)
13438{
13439 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13440 return true;
13441
13442 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13443 return true;
13444
13445 return false;
13446}
13447
13448static void
13449put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13450 bool fp, FILE *file)
13451{
13452 const char *suffix;
13453
13454 if (mode == CCFPmode)
13455 {
13456 code = ix86_fp_compare_code_to_integer (code);
13457 mode = CCmode;
13458 }
13459 if (reverse)
13460 code = reverse_condition (code);
13461
13462 switch (code)
13463 {
13464 case EQ:
13465 gcc_assert (mode != CCGZmode);
13466 switch (mode)
13467 {
13468 case E_CCAmode:
13469 suffix = "a";
13470 break;
13471 case E_CCCmode:
13472 suffix = "c";
13473 break;
13474 case E_CCOmode:
13475 suffix = "o";
13476 break;
13477 case E_CCPmode:
13478 suffix = "p";
13479 break;
13480 case E_CCSmode:
13481 suffix = "s";
13482 break;
13483 default:
13484 suffix = "e";
13485 break;
13486 }
13487 break;
13488 case NE:
13489 gcc_assert (mode != CCGZmode);
13490 switch (mode)
13491 {
13492 case E_CCAmode:
13493 suffix = "na";
13494 break;
13495 case E_CCCmode:
13496 suffix = "nc";
13497 break;
13498 case E_CCOmode:
13499 suffix = "no";
13500 break;
13501 case E_CCPmode:
13502 suffix = "np";
13503 break;
13504 case E_CCSmode:
13505 suffix = "ns";
13506 break;
13507 default:
13508 suffix = "ne";
13509 break;
13510 }
13511 break;
13512 case GT:
13513 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13514 suffix = "g";
13515 break;
13516 case GTU:
13517 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13518 Those same assemblers have the same but opposite lossage on cmov. */
13519 if (mode == CCmode)
13520 suffix = fp ? "nbe" : "a";
13521 else
13522 gcc_unreachable ();
13523 break;
13524 case LT:
13525 switch (mode)
13526 {
13527 case E_CCNOmode:
13528 case E_CCGOCmode:
13529 suffix = "s";
13530 break;
13531
13532 case E_CCmode:
13533 case E_CCGCmode:
13534 case E_CCGZmode:
13535 suffix = "l";
13536 break;
13537
13538 default:
13539 gcc_unreachable ();
13540 }
13541 break;
13542 case LTU:
13543 if (mode == CCmode || mode == CCGZmode)
13544 suffix = "b";
13545 else if (mode == CCCmode)
13546 suffix = fp ? "b" : "c";
13547 else
13548 gcc_unreachable ();
13549 break;
13550 case GE:
13551 switch (mode)
13552 {
13553 case E_CCNOmode:
13554 case E_CCGOCmode:
13555 suffix = "ns";
13556 break;
13557
13558 case E_CCmode:
13559 case E_CCGCmode:
13560 case E_CCGZmode:
13561 suffix = "ge";
13562 break;
13563
13564 default:
13565 gcc_unreachable ();
13566 }
13567 break;
13568 case GEU:
13569 if (mode == CCmode || mode == CCGZmode)
13570 suffix = "nb";
13571 else if (mode == CCCmode)
13572 suffix = fp ? "nb" : "nc";
13573 else
13574 gcc_unreachable ();
13575 break;
13576 case LE:
13577 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13578 suffix = "le";
13579 break;
13580 case LEU:
13581 if (mode == CCmode)
13582 suffix = "be";
13583 else
13584 gcc_unreachable ();
13585 break;
13586 case UNORDERED:
13587 suffix = fp ? "u" : "p";
13588 break;
13589 case ORDERED:
13590 suffix = fp ? "nu" : "np";
13591 break;
13592 default:
13593 gcc_unreachable ();
13594 }
13595 fputs (s: suffix, stream: file);
13596}
13597
13598/* Print the name of register X to FILE based on its machine mode and number.
13599 If CODE is 'w', pretend the mode is HImode.
13600 If CODE is 'b', pretend the mode is QImode.
13601 If CODE is 'k', pretend the mode is SImode.
13602 If CODE is 'q', pretend the mode is DImode.
13603 If CODE is 'x', pretend the mode is V4SFmode.
13604 If CODE is 't', pretend the mode is V8SFmode.
13605 If CODE is 'g', pretend the mode is V16SFmode.
13606 If CODE is 'h', pretend the reg is the 'high' byte register.
13607 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13608 If CODE is 'd', duplicate the operand for AVX instruction.
13609 If CODE is 'V', print naked full integer register name without %.
13610 */
13611
13612void
13613print_reg (rtx x, int code, FILE *file)
13614{
13615 const char *reg;
13616 int msize;
13617 unsigned int regno;
13618 bool duplicated;
13619
13620 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13621 putc (c: '%', stream: file);
13622
13623 if (x == pc_rtx)
13624 {
13625 gcc_assert (TARGET_64BIT);
13626 fputs (s: "rip", stream: file);
13627 return;
13628 }
13629
13630 if (code == 'y' && STACK_TOP_P (x))
13631 {
13632 fputs (s: "st(0)", stream: file);
13633 return;
13634 }
13635
13636 if (code == 'w')
13637 msize = 2;
13638 else if (code == 'b')
13639 msize = 1;
13640 else if (code == 'k')
13641 msize = 4;
13642 else if (code == 'q')
13643 msize = 8;
13644 else if (code == 'h')
13645 msize = 0;
13646 else if (code == 'x')
13647 msize = 16;
13648 else if (code == 't')
13649 msize = 32;
13650 else if (code == 'g')
13651 msize = 64;
13652 else
13653 msize = GET_MODE_SIZE (GET_MODE (x));
13654
13655 regno = REGNO (x);
13656
13657 if (regno == ARG_POINTER_REGNUM
13658 || regno == FRAME_POINTER_REGNUM
13659 || regno == FPSR_REG)
13660 {
13661 output_operand_lossage
13662 ("invalid use of register '%s'", reg_names[regno]);
13663 return;
13664 }
13665 else if (regno == FLAGS_REG)
13666 {
13667 output_operand_lossage ("invalid use of asm flag output");
13668 return;
13669 }
13670
13671 if (code == 'V')
13672 {
13673 if (GENERAL_REGNO_P (regno))
13674 msize = GET_MODE_SIZE (word_mode);
13675 else
13676 error ("%<V%> modifier on non-integer register");
13677 }
13678
13679 duplicated = code == 'd' && TARGET_AVX;
13680
13681 switch (msize)
13682 {
13683 case 16:
13684 case 12:
13685 case 8:
13686 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13687 warning (0, "unsupported size for integer register");
13688 /* FALLTHRU */
13689 case 4:
13690 if (LEGACY_INT_REGNO_P (regno))
13691 putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file);
13692 /* FALLTHRU */
13693 case 2:
13694 normal:
13695 reg = hi_reg_name[regno];
13696 break;
13697 case 1:
13698 if (regno >= ARRAY_SIZE (qi_reg_name))
13699 goto normal;
13700 if (!ANY_QI_REGNO_P (regno))
13701 error ("unsupported size for integer register");
13702 reg = qi_reg_name[regno];
13703 break;
13704 case 0:
13705 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13706 goto normal;
13707 reg = qi_high_reg_name[regno];
13708 break;
13709 case 32:
13710 case 64:
13711 if (SSE_REGNO_P (regno))
13712 {
13713 gcc_assert (!duplicated);
13714 putc (c: msize == 32 ? 'y' : 'z', stream: file);
13715 reg = hi_reg_name[regno] + 1;
13716 break;
13717 }
13718 goto normal;
13719 default:
13720 gcc_unreachable ();
13721 }
13722
13723 fputs (s: reg, stream: file);
13724
13725 /* Irritatingly, AMD extended registers use
13726 different naming convention: "r%d[bwd]" */
13727 if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13728 {
13729 gcc_assert (TARGET_64BIT);
13730 switch (msize)
13731 {
13732 case 0:
13733 error ("extended registers have no high halves");
13734 break;
13735 case 1:
13736 putc (c: 'b', stream: file);
13737 break;
13738 case 2:
13739 putc (c: 'w', stream: file);
13740 break;
13741 case 4:
13742 putc (c: 'd', stream: file);
13743 break;
13744 case 8:
13745 /* no suffix */
13746 break;
13747 default:
13748 error ("unsupported operand size for extended register");
13749 break;
13750 }
13751 return;
13752 }
13753
13754 if (duplicated)
13755 {
13756 if (ASSEMBLER_DIALECT == ASM_ATT)
13757 fprintf (stream: file, format: ", %%%s", reg);
13758 else
13759 fprintf (stream: file, format: ", %s", reg);
13760 }
13761}
13762
13763/* Meaning of CODE:
13764 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13765 C -- print opcode suffix for set/cmov insn.
13766 c -- like C, but print reversed condition
13767 F,f -- likewise, but for floating-point.
13768 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13769 otherwise nothing
13770 R -- print embedded rounding and sae.
13771 r -- print only sae.
13772 z -- print the opcode suffix for the size of the current operand.
13773 Z -- likewise, with special suffixes for x87 instructions.
13774 * -- print a star (in certain assembler syntax)
13775 A -- print an absolute memory reference.
13776 E -- print address with DImode register names if TARGET_64BIT.
13777 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13778 s -- print a shift double count, followed by the assemblers argument
13779 delimiter.
13780 b -- print the QImode name of the register for the indicated operand.
13781 %b0 would print %al if operands[0] is reg 0.
13782 w -- likewise, print the HImode name of the register.
13783 k -- likewise, print the SImode name of the register.
13784 q -- likewise, print the DImode name of the register.
13785 x -- likewise, print the V4SFmode name of the register.
13786 t -- likewise, print the V8SFmode name of the register.
13787 g -- likewise, print the V16SFmode name of the register.
13788 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13789 y -- print "st(0)" instead of "st" as a register.
13790 d -- print duplicated register operand for AVX instruction.
13791 D -- print condition for SSE cmp instruction.
13792 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13793 address from GOT.
13794 p -- print raw symbol name.
13795 X -- don't print any sort of PIC '@' suffix for a symbol.
13796 & -- print some in-use local-dynamic symbol name.
13797 H -- print a memory address offset by 8; used for sse high-parts
13798 Y -- print condition for XOP pcom* instruction.
13799 V -- print naked full integer register name without %.
13800 v -- print segment override prefix
13801 + -- print a branch hint as 'cs' or 'ds' prefix
13802 ; -- print a semicolon (after prefixes due to bug in older gas).
13803 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13804 ^ -- print addr32 prefix if Pmode != word_mode
13805 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13806 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13807 N -- print maskz if it's constant 0 operand.
13808 G -- print embedded flag for ccmp/ctest.
13809 */
13810
13811void
13812ix86_print_operand (FILE *file, rtx x, int code)
13813{
13814 if (code)
13815 {
13816 switch (code)
13817 {
13818 case 'A':
13819 switch (ASSEMBLER_DIALECT)
13820 {
13821 case ASM_ATT:
13822 putc (c: '*', stream: file);
13823 break;
13824
13825 case ASM_INTEL:
13826 /* Intel syntax. For absolute addresses, registers should not
13827 be surrounded by braces. */
13828 if (!REG_P (x))
13829 {
13830 putc (c: '[', stream: file);
13831 ix86_print_operand (file, x, code: 0);
13832 putc (c: ']', stream: file);
13833 return;
13834 }
13835 break;
13836
13837 default:
13838 gcc_unreachable ();
13839 }
13840
13841 ix86_print_operand (file, x, code: 0);
13842 return;
13843
13844 case 'E':
13845 /* Wrap address in an UNSPEC to declare special handling. */
13846 if (TARGET_64BIT)
13847 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13848
13849 output_address (VOIDmode, x);
13850 return;
13851
13852 case 'L':
13853 if (ASSEMBLER_DIALECT == ASM_ATT)
13854 putc (c: 'l', stream: file);
13855 return;
13856
13857 case 'W':
13858 if (ASSEMBLER_DIALECT == ASM_ATT)
13859 putc (c: 'w', stream: file);
13860 return;
13861
13862 case 'B':
13863 if (ASSEMBLER_DIALECT == ASM_ATT)
13864 putc (c: 'b', stream: file);
13865 return;
13866
13867 case 'Q':
13868 if (ASSEMBLER_DIALECT == ASM_ATT)
13869 putc (c: 'l', stream: file);
13870 return;
13871
13872 case 'S':
13873 if (ASSEMBLER_DIALECT == ASM_ATT)
13874 putc (c: 's', stream: file);
13875 return;
13876
13877 case 'T':
13878 if (ASSEMBLER_DIALECT == ASM_ATT)
13879 putc (c: 't', stream: file);
13880 return;
13881
13882 case 'O':
13883#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13884 if (ASSEMBLER_DIALECT != ASM_ATT)
13885 return;
13886
13887 switch (GET_MODE_SIZE (GET_MODE (x)))
13888 {
13889 case 2:
13890 putc ('w', file);
13891 break;
13892
13893 case 4:
13894 putc ('l', file);
13895 break;
13896
13897 case 8:
13898 putc ('q', file);
13899 break;
13900
13901 default:
13902 output_operand_lossage ("invalid operand size for operand "
13903 "code 'O'");
13904 return;
13905 }
13906
13907 putc ('.', file);
13908#endif
13909 return;
13910
13911 case 'z':
13912 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13913 {
13914 /* Opcodes don't get size suffixes if using Intel opcodes. */
13915 if (ASSEMBLER_DIALECT == ASM_INTEL)
13916 return;
13917
13918 switch (GET_MODE_SIZE (GET_MODE (x)))
13919 {
13920 case 1:
13921 putc (c: 'b', stream: file);
13922 return;
13923
13924 case 2:
13925 putc (c: 'w', stream: file);
13926 return;
13927
13928 case 4:
13929 putc (c: 'l', stream: file);
13930 return;
13931
13932 case 8:
13933 putc (c: 'q', stream: file);
13934 return;
13935
13936 default:
13937 output_operand_lossage ("invalid operand size for operand "
13938 "code 'z'");
13939 return;
13940 }
13941 }
13942
13943 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13944 {
13945 if (this_is_asm_operands)
13946 warning_for_asm (this_is_asm_operands,
13947 "non-integer operand used with operand code %<z%>");
13948 else
13949 warning (0, "non-integer operand used with operand code %<z%>");
13950 }
13951 /* FALLTHRU */
13952
13953 case 'Z':
13954 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13955 if (ASSEMBLER_DIALECT == ASM_INTEL)
13956 return;
13957
13958 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13959 {
13960 switch (GET_MODE_SIZE (GET_MODE (x)))
13961 {
13962 case 2:
13963#ifdef HAVE_AS_IX86_FILDS
13964 putc (c: 's', stream: file);
13965#endif
13966 return;
13967
13968 case 4:
13969 putc (c: 'l', stream: file);
13970 return;
13971
13972 case 8:
13973#ifdef HAVE_AS_IX86_FILDQ
13974 putc (c: 'q', stream: file);
13975#else
13976 fputs ("ll", file);
13977#endif
13978 return;
13979
13980 default:
13981 break;
13982 }
13983 }
13984 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13985 {
13986 /* 387 opcodes don't get size suffixes
13987 if the operands are registers. */
13988 if (STACK_REG_P (x))
13989 return;
13990
13991 switch (GET_MODE_SIZE (GET_MODE (x)))
13992 {
13993 case 4:
13994 putc (c: 's', stream: file);
13995 return;
13996
13997 case 8:
13998 putc (c: 'l', stream: file);
13999 return;
14000
14001 case 12:
14002 case 16:
14003 putc (c: 't', stream: file);
14004 return;
14005
14006 default:
14007 break;
14008 }
14009 }
14010 else
14011 {
14012 output_operand_lossage ("invalid operand type used with "
14013 "operand code '%c'", code);
14014 return;
14015 }
14016
14017 output_operand_lossage ("invalid operand size for operand code '%c'",
14018 code);
14019 return;
14020
14021 case 'd':
14022 case 'b':
14023 case 'w':
14024 case 'k':
14025 case 'q':
14026 case 'h':
14027 case 't':
14028 case 'g':
14029 case 'y':
14030 case 'x':
14031 case 'X':
14032 case 'P':
14033 case 'p':
14034 case 'V':
14035 break;
14036
14037 case 's':
14038 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14039 {
14040 ix86_print_operand (file, x, code: 0);
14041 fputs (s: ", ", stream: file);
14042 }
14043 return;
14044
14045 case 'Y':
14046 switch (GET_CODE (x))
14047 {
14048 case NE:
14049 fputs (s: "neq", stream: file);
14050 break;
14051 case EQ:
14052 fputs (s: "eq", stream: file);
14053 break;
14054 case GE:
14055 case GEU:
14056 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", stream: file);
14057 break;
14058 case GT:
14059 case GTU:
14060 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", stream: file);
14061 break;
14062 case LE:
14063 case LEU:
14064 fputs (s: "le", stream: file);
14065 break;
14066 case LT:
14067 case LTU:
14068 fputs (s: "lt", stream: file);
14069 break;
14070 case UNORDERED:
14071 fputs (s: "unord", stream: file);
14072 break;
14073 case ORDERED:
14074 fputs (s: "ord", stream: file);
14075 break;
14076 case UNEQ:
14077 fputs (s: "ueq", stream: file);
14078 break;
14079 case UNGE:
14080 fputs (s: "nlt", stream: file);
14081 break;
14082 case UNGT:
14083 fputs (s: "nle", stream: file);
14084 break;
14085 case UNLE:
14086 fputs (s: "ule", stream: file);
14087 break;
14088 case UNLT:
14089 fputs (s: "ult", stream: file);
14090 break;
14091 case LTGT:
14092 fputs (s: "une", stream: file);
14093 break;
14094 default:
14095 output_operand_lossage ("operand is not a condition code, "
14096 "invalid operand code 'Y'");
14097 return;
14098 }
14099 return;
14100
14101 case 'D':
14102 /* Little bit of braindamage here. The SSE compare instructions
14103 does use completely different names for the comparisons that the
14104 fp conditional moves. */
14105 switch (GET_CODE (x))
14106 {
14107 case UNEQ:
14108 if (TARGET_AVX)
14109 {
14110 fputs (s: "eq_us", stream: file);
14111 break;
14112 }
14113 /* FALLTHRU */
14114 case EQ:
14115 fputs (s: "eq", stream: file);
14116 break;
14117 case UNLT:
14118 if (TARGET_AVX)
14119 {
14120 fputs (s: "nge", stream: file);
14121 break;
14122 }
14123 /* FALLTHRU */
14124 case LT:
14125 fputs (s: "lt", stream: file);
14126 break;
14127 case UNLE:
14128 if (TARGET_AVX)
14129 {
14130 fputs (s: "ngt", stream: file);
14131 break;
14132 }
14133 /* FALLTHRU */
14134 case LE:
14135 fputs (s: "le", stream: file);
14136 break;
14137 case UNORDERED:
14138 fputs (s: "unord", stream: file);
14139 break;
14140 case LTGT:
14141 if (TARGET_AVX)
14142 {
14143 fputs (s: "neq_oq", stream: file);
14144 break;
14145 }
14146 /* FALLTHRU */
14147 case NE:
14148 fputs (s: "neq", stream: file);
14149 break;
14150 case GE:
14151 if (TARGET_AVX)
14152 {
14153 fputs (s: "ge", stream: file);
14154 break;
14155 }
14156 /* FALLTHRU */
14157 case UNGE:
14158 fputs (s: "nlt", stream: file);
14159 break;
14160 case GT:
14161 if (TARGET_AVX)
14162 {
14163 fputs (s: "gt", stream: file);
14164 break;
14165 }
14166 /* FALLTHRU */
14167 case UNGT:
14168 fputs (s: "nle", stream: file);
14169 break;
14170 case ORDERED:
14171 fputs (s: "ord", stream: file);
14172 break;
14173 default:
14174 output_operand_lossage ("operand is not a condition code, "
14175 "invalid operand code 'D'");
14176 return;
14177 }
14178 return;
14179
14180 case 'F':
14181 case 'f':
14182#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14183 if (ASSEMBLER_DIALECT == ASM_ATT)
14184 putc ('.', file);
14185 gcc_fallthrough ();
14186#endif
14187
14188 case 'C':
14189 case 'c':
14190 if (!COMPARISON_P (x))
14191 {
14192 output_operand_lossage ("operand is not a condition code, "
14193 "invalid operand code '%c'", code);
14194 return;
14195 }
14196 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14197 reverse: code == 'c' || code == 'f',
14198 fp: code == 'F' || code == 'f',
14199 file);
14200 return;
14201
14202 case 'G':
14203 {
14204 int dfv = INTVAL (x);
14205 const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
14206 fputs (s: dfv_suffix, stream: file);
14207 }
14208 return;
14209
14210 case 'H':
14211 if (!offsettable_memref_p (x))
14212 {
14213 output_operand_lossage ("operand is not an offsettable memory "
14214 "reference, invalid operand code 'H'");
14215 return;
14216 }
14217 /* It doesn't actually matter what mode we use here, as we're
14218 only going to use this for printing. */
14219 x = adjust_address_nv (x, DImode, 8);
14220 /* Output 'qword ptr' for intel assembler dialect. */
14221 if (ASSEMBLER_DIALECT == ASM_INTEL)
14222 code = 'q';
14223 break;
14224
14225 case 'K':
14226 if (!CONST_INT_P (x))
14227 {
14228 output_operand_lossage ("operand is not an integer, invalid "
14229 "operand code 'K'");
14230 return;
14231 }
14232
14233 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14234#ifdef HAVE_AS_IX86_HLE
14235 fputs (s: "xacquire ", stream: file);
14236#else
14237 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14238#endif
14239 else if (INTVAL (x) & IX86_HLE_RELEASE)
14240#ifdef HAVE_AS_IX86_HLE
14241 fputs (s: "xrelease ", stream: file);
14242#else
14243 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14244#endif
14245 /* We do not want to print value of the operand. */
14246 return;
14247
14248 case 'N':
14249 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14250 fputs (s: "{z}", stream: file);
14251 return;
14252
14253 case 'r':
14254 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14255 {
14256 output_operand_lossage ("operand is not a specific integer, "
14257 "invalid operand code 'r'");
14258 return;
14259 }
14260
14261 if (ASSEMBLER_DIALECT == ASM_INTEL)
14262 fputs (s: ", ", stream: file);
14263
14264 fputs (s: "{sae}", stream: file);
14265
14266 if (ASSEMBLER_DIALECT == ASM_ATT)
14267 fputs (s: ", ", stream: file);
14268
14269 return;
14270
14271 case 'R':
14272 if (!CONST_INT_P (x))
14273 {
14274 output_operand_lossage ("operand is not an integer, invalid "
14275 "operand code 'R'");
14276 return;
14277 }
14278
14279 if (ASSEMBLER_DIALECT == ASM_INTEL)
14280 fputs (s: ", ", stream: file);
14281
14282 switch (INTVAL (x))
14283 {
14284 case ROUND_NEAREST_INT | ROUND_SAE:
14285 fputs (s: "{rn-sae}", stream: file);
14286 break;
14287 case ROUND_NEG_INF | ROUND_SAE:
14288 fputs (s: "{rd-sae}", stream: file);
14289 break;
14290 case ROUND_POS_INF | ROUND_SAE:
14291 fputs (s: "{ru-sae}", stream: file);
14292 break;
14293 case ROUND_ZERO | ROUND_SAE:
14294 fputs (s: "{rz-sae}", stream: file);
14295 break;
14296 default:
14297 output_operand_lossage ("operand is not a specific integer, "
14298 "invalid operand code 'R'");
14299 }
14300
14301 if (ASSEMBLER_DIALECT == ASM_ATT)
14302 fputs (s: ", ", stream: file);
14303
14304 return;
14305
14306 case 'v':
14307 if (MEM_P (x))
14308 {
14309 switch (MEM_ADDR_SPACE (x))
14310 {
14311 case ADDR_SPACE_GENERIC:
14312 break;
14313 case ADDR_SPACE_SEG_FS:
14314 fputs (s: "fs ", stream: file);
14315 break;
14316 case ADDR_SPACE_SEG_GS:
14317 fputs (s: "gs ", stream: file);
14318 break;
14319 default:
14320 gcc_unreachable ();
14321 }
14322 }
14323 else
14324 output_operand_lossage ("operand is not a memory reference, "
14325 "invalid operand code 'v'");
14326 return;
14327
14328 case '*':
14329 if (ASSEMBLER_DIALECT == ASM_ATT)
14330 putc (c: '*', stream: file);
14331 return;
14332
14333 case '&':
14334 {
14335 const char *name = get_some_local_dynamic_name ();
14336 if (name == NULL)
14337 output_operand_lossage ("'%%&' used without any "
14338 "local dynamic TLS references");
14339 else
14340 assemble_name (file, name);
14341 return;
14342 }
14343
14344 case '+':
14345 {
14346 rtx x;
14347
14348 if (!optimize
14349 || optimize_function_for_size_p (cfun)
14350 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14351 && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14352 return;
14353
14354 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14355 if (x)
14356 {
14357 int pred_val = profile_probability::from_reg_br_prob_note
14358 (XINT (x, 0)).to_reg_br_prob_base ();
14359
14360 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14361 /* We use 3e (DS) prefix for taken branches and
14362 2e (CS) prefix for not taken branches. */
14363 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14364 fputs (s: "ds ; ", stream: file);
14365 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14366 fputs (s: "cs ; ", stream: file);
14367 }
14368 return;
14369 }
14370
14371 case ';':
14372#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14373 putc (';', file);
14374#endif
14375 return;
14376
14377 case '~':
14378 putc (TARGET_AVX2 ? 'i' : 'f', stream: file);
14379 return;
14380
14381 case 'M':
14382 if (TARGET_X32)
14383 {
14384 /* NB: 32-bit indices in VSIB address are sign-extended
14385 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14386 sign-extended to 0xfffffffff7fa3010 which is invalid
14387 address. Add addr32 prefix if there is no base
14388 register nor symbol. */
14389 bool ok;
14390 struct ix86_address parts;
14391 ok = ix86_decompose_address (addr: x, out: &parts);
14392 gcc_assert (ok && parts.index == NULL_RTX);
14393 if (parts.base == NULL_RTX
14394 && (parts.disp == NULL_RTX
14395 || !symbolic_operand (parts.disp,
14396 GET_MODE (parts.disp))))
14397 fputs (s: "addr32 ", stream: file);
14398 }
14399 return;
14400
14401 case '^':
14402 if (Pmode != word_mode)
14403 fputs (s: "addr32 ", stream: file);
14404 return;
14405
14406 case '!':
14407 if (ix86_notrack_prefixed_insn_p (current_output_insn))
14408 fputs (s: "notrack ", stream: file);
14409 return;
14410
14411 default:
14412 output_operand_lossage ("invalid operand code '%c'", code);
14413 }
14414 }
14415
14416 if (REG_P (x))
14417 print_reg (x, code, file);
14418
14419 else if (MEM_P (x))
14420 {
14421 rtx addr = XEXP (x, 0);
14422
14423 /* No `byte ptr' prefix for call instructions ... */
14424 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14425 {
14426 machine_mode mode = GET_MODE (x);
14427 const char *size;
14428
14429 /* Check for explicit size override codes. */
14430 if (code == 'b')
14431 size = "BYTE";
14432 else if (code == 'w')
14433 size = "WORD";
14434 else if (code == 'k')
14435 size = "DWORD";
14436 else if (code == 'q')
14437 size = "QWORD";
14438 else if (code == 'x')
14439 size = "XMMWORD";
14440 else if (code == 't')
14441 size = "YMMWORD";
14442 else if (code == 'g')
14443 size = "ZMMWORD";
14444 else if (mode == BLKmode)
14445 /* ... or BLKmode operands, when not overridden. */
14446 size = NULL;
14447 else
14448 switch (GET_MODE_SIZE (mode))
14449 {
14450 case 1: size = "BYTE"; break;
14451 case 2: size = "WORD"; break;
14452 case 4: size = "DWORD"; break;
14453 case 8: size = "QWORD"; break;
14454 case 12: size = "TBYTE"; break;
14455 case 16:
14456 if (mode == XFmode)
14457 size = "TBYTE";
14458 else
14459 size = "XMMWORD";
14460 break;
14461 case 32: size = "YMMWORD"; break;
14462 case 64: size = "ZMMWORD"; break;
14463 default:
14464 gcc_unreachable ();
14465 }
14466 if (size)
14467 {
14468 fputs (s: size, stream: file);
14469 fputs (s: " PTR ", stream: file);
14470 }
14471 }
14472
14473 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14474 output_operand_lossage ("invalid constraints for operand");
14475 else
14476 ix86_print_operand_address_as
14477 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14478 }
14479
14480 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14481 {
14482 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14483 REAL_MODE_FORMAT (HFmode));
14484 if (ASSEMBLER_DIALECT == ASM_ATT)
14485 putc (c: '$', stream: file);
14486 fprintf (stream: file, format: "0x%04x", (unsigned int) l);
14487 }
14488
14489 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14490 {
14491 long l;
14492
14493 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14494
14495 if (ASSEMBLER_DIALECT == ASM_ATT)
14496 putc (c: '$', stream: file);
14497 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14498 if (code == 'q')
14499 fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x",
14500 (unsigned long long) (int) l);
14501 else
14502 fprintf (stream: file, format: "0x%08x", (unsigned int) l);
14503 }
14504
14505 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14506 {
14507 long l[2];
14508
14509 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14510
14511 if (ASSEMBLER_DIALECT == ASM_ATT)
14512 putc (c: '$', stream: file);
14513 fprintf (stream: file, format: "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14514 }
14515
14516 /* These float cases don't actually occur as immediate operands. */
14517 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14518 {
14519 char dstr[30];
14520
14521 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14522 fputs (s: dstr, stream: file);
14523 }
14524
14525 /* Print bcst_mem_operand. */
14526 else if (GET_CODE (x) == VEC_DUPLICATE)
14527 {
14528 machine_mode vmode = GET_MODE (x);
14529 /* Must be bcst_memory_operand. */
14530 gcc_assert (bcst_mem_operand (x, vmode));
14531
14532 rtx mem = XEXP (x,0);
14533 ix86_print_operand (file, x: mem, code: 0);
14534
14535 switch (vmode)
14536 {
14537 case E_V2DImode:
14538 case E_V2DFmode:
14539 fputs (s: "{1to2}", stream: file);
14540 break;
14541 case E_V4SImode:
14542 case E_V4SFmode:
14543 case E_V4DImode:
14544 case E_V4DFmode:
14545 fputs (s: "{1to4}", stream: file);
14546 break;
14547 case E_V8SImode:
14548 case E_V8SFmode:
14549 case E_V8DFmode:
14550 case E_V8DImode:
14551 case E_V8HFmode:
14552 fputs (s: "{1to8}", stream: file);
14553 break;
14554 case E_V16SFmode:
14555 case E_V16SImode:
14556 case E_V16HFmode:
14557 fputs (s: "{1to16}", stream: file);
14558 break;
14559 case E_V32HFmode:
14560 fputs (s: "{1to32}", stream: file);
14561 break;
14562 default:
14563 gcc_unreachable ();
14564 }
14565 }
14566
14567 else
14568 {
14569 /* We have patterns that allow zero sets of memory, for instance.
14570 In 64-bit mode, we should probably support all 8-byte vectors,
14571 since we can in fact encode that into an immediate. */
14572 if (GET_CODE (x) == CONST_VECTOR)
14573 {
14574 if (x != CONST0_RTX (GET_MODE (x)))
14575 output_operand_lossage ("invalid vector immediate");
14576 x = const0_rtx;
14577 }
14578
14579 if (code == 'P')
14580 {
14581 if (ix86_force_load_from_GOT_p (x, call_p: true))
14582 {
14583 /* For inline assembly statement, load function address
14584 from GOT with 'P' operand modifier to avoid PLT. */
14585 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14586 (TARGET_64BIT
14587 ? UNSPEC_GOTPCREL
14588 : UNSPEC_GOT));
14589 x = gen_rtx_CONST (Pmode, x);
14590 x = gen_const_mem (Pmode, x);
14591 ix86_print_operand (file, x, code: 'A');
14592 return;
14593 }
14594 }
14595 else if (code != 'p')
14596 {
14597 if (CONST_INT_P (x))
14598 {
14599 if (ASSEMBLER_DIALECT == ASM_ATT)
14600 putc (c: '$', stream: file);
14601 }
14602 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14603 || GET_CODE (x) == LABEL_REF)
14604 {
14605 if (ASSEMBLER_DIALECT == ASM_ATT)
14606 putc (c: '$', stream: file);
14607 else
14608 fputs (s: "OFFSET FLAT:", stream: file);
14609 }
14610 }
14611 if (CONST_INT_P (x))
14612 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14613 else if (flag_pic || MACHOPIC_INDIRECT)
14614 output_pic_addr_const (file, x, code);
14615 else
14616 output_addr_const (file, x);
14617 }
14618}
14619
14620static bool
14621ix86_print_operand_punct_valid_p (unsigned char code)
14622{
14623 return (code == '*' || code == '+' || code == '&' || code == ';'
14624 || code == '~' || code == '^' || code == '!');
14625}
14626
14627/* Print a memory operand whose address is ADDR. */
14628
14629static void
14630ix86_print_operand_address_as (FILE *file, rtx addr,
14631 addr_space_t as, bool raw)
14632{
14633 struct ix86_address parts;
14634 rtx base, index, disp;
14635 int scale;
14636 int ok;
14637 bool vsib = false;
14638 int code = 0;
14639
14640 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14641 {
14642 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14643 gcc_assert (parts.index == NULL_RTX);
14644 parts.index = XVECEXP (addr, 0, 1);
14645 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14646 addr = XVECEXP (addr, 0, 0);
14647 vsib = true;
14648 }
14649 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14650 {
14651 gcc_assert (TARGET_64BIT);
14652 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14653 code = 'q';
14654 }
14655 else
14656 ok = ix86_decompose_address (addr, out: &parts);
14657
14658 gcc_assert (ok);
14659
14660 base = parts.base;
14661 index = parts.index;
14662 disp = parts.disp;
14663 scale = parts.scale;
14664
14665 if (ADDR_SPACE_GENERIC_P (as))
14666 as = parts.seg;
14667 else
14668 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14669
14670 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14671 {
14672 if (ASSEMBLER_DIALECT == ASM_ATT)
14673 putc (c: '%', stream: file);
14674
14675 switch (as)
14676 {
14677 case ADDR_SPACE_SEG_FS:
14678 fputs (s: "fs:", stream: file);
14679 break;
14680 case ADDR_SPACE_SEG_GS:
14681 fputs (s: "gs:", stream: file);
14682 break;
14683 default:
14684 gcc_unreachable ();
14685 }
14686 }
14687
14688 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14689 if (TARGET_64BIT && !base && !index && !raw)
14690 {
14691 rtx symbol = disp;
14692
14693 if (GET_CODE (disp) == CONST
14694 && GET_CODE (XEXP (disp, 0)) == PLUS
14695 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14696 symbol = XEXP (XEXP (disp, 0), 0);
14697
14698 if (GET_CODE (symbol) == LABEL_REF
14699 || (GET_CODE (symbol) == SYMBOL_REF
14700 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14701 base = pc_rtx;
14702 }
14703
14704 if (!base && !index)
14705 {
14706 /* Displacement only requires special attention. */
14707 if (CONST_INT_P (disp))
14708 {
14709 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14710 fputs (s: "ds:", stream: file);
14711 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14712 }
14713 /* Load the external function address via the GOT slot to avoid PLT. */
14714 else if (GET_CODE (disp) == CONST
14715 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14716 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14717 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14718 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14719 output_pic_addr_const (file, x: disp, code: 0);
14720 else if (flag_pic)
14721 output_pic_addr_const (file, x: disp, code: 0);
14722 else
14723 output_addr_const (file, disp);
14724 }
14725 else
14726 {
14727 /* Print SImode register names to force addr32 prefix. */
14728 if (SImode_address_operand (addr, VOIDmode))
14729 {
14730 if (flag_checking)
14731 {
14732 gcc_assert (TARGET_64BIT);
14733 switch (GET_CODE (addr))
14734 {
14735 case SUBREG:
14736 gcc_assert (GET_MODE (addr) == SImode);
14737 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14738 break;
14739 case ZERO_EXTEND:
14740 case AND:
14741 gcc_assert (GET_MODE (addr) == DImode);
14742 break;
14743 default:
14744 gcc_unreachable ();
14745 }
14746 }
14747 gcc_assert (!code);
14748 code = 'k';
14749 }
14750 else if (code == 0
14751 && TARGET_X32
14752 && disp
14753 && CONST_INT_P (disp)
14754 && INTVAL (disp) < -16*1024*1024)
14755 {
14756 /* X32 runs in 64-bit mode, where displacement, DISP, in
14757 address DISP(%r64), is encoded as 32-bit immediate sign-
14758 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14759 address is %r64 + 0xffffffffbffffd00. When %r64 <
14760 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14761 which is invalid for x32. The correct address is %r64
14762 - 0x40000300 == 0xf7ffdd64. To properly encode
14763 -0x40000300(%r64) for x32, we zero-extend negative
14764 displacement by forcing addr32 prefix which truncates
14765 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14766 zero-extend all negative displacements, including -1(%rsp).
14767 However, for small negative displacements, sign-extension
14768 won't cause overflow. We only zero-extend negative
14769 displacements if they < -16*1024*1024, which is also used
14770 to check legitimate address displacements for PIC. */
14771 code = 'k';
14772 }
14773
14774 /* Since the upper 32 bits of RSP are always zero for x32,
14775 we can encode %esp as %rsp to avoid 0x67 prefix if
14776 there is no index register. */
14777 if (TARGET_X32 && Pmode == SImode
14778 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14779 code = 'q';
14780
14781 if (ASSEMBLER_DIALECT == ASM_ATT)
14782 {
14783 if (disp)
14784 {
14785 if (flag_pic)
14786 output_pic_addr_const (file, x: disp, code: 0);
14787 else if (GET_CODE (disp) == LABEL_REF)
14788 output_asm_label (disp);
14789 else
14790 output_addr_const (file, disp);
14791 }
14792
14793 putc (c: '(', stream: file);
14794 if (base)
14795 print_reg (x: base, code, file);
14796 if (index)
14797 {
14798 putc (c: ',', stream: file);
14799 print_reg (x: index, code: vsib ? 0 : code, file);
14800 if (scale != 1 || vsib)
14801 fprintf (stream: file, format: ",%d", scale);
14802 }
14803 putc (c: ')', stream: file);
14804 }
14805 else
14806 {
14807 rtx offset = NULL_RTX;
14808
14809 if (disp)
14810 {
14811 /* Pull out the offset of a symbol; print any symbol itself. */
14812 if (GET_CODE (disp) == CONST
14813 && GET_CODE (XEXP (disp, 0)) == PLUS
14814 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14815 {
14816 offset = XEXP (XEXP (disp, 0), 1);
14817 disp = gen_rtx_CONST (VOIDmode,
14818 XEXP (XEXP (disp, 0), 0));
14819 }
14820
14821 if (flag_pic)
14822 output_pic_addr_const (file, x: disp, code: 0);
14823 else if (GET_CODE (disp) == LABEL_REF)
14824 output_asm_label (disp);
14825 else if (CONST_INT_P (disp))
14826 offset = disp;
14827 else
14828 output_addr_const (file, disp);
14829 }
14830
14831 putc (c: '[', stream: file);
14832 if (base)
14833 {
14834 print_reg (x: base, code, file);
14835 if (offset)
14836 {
14837 if (INTVAL (offset) >= 0)
14838 putc (c: '+', stream: file);
14839 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14840 }
14841 }
14842 else if (offset)
14843 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14844 else
14845 putc (c: '0', stream: file);
14846
14847 if (index)
14848 {
14849 putc (c: '+', stream: file);
14850 print_reg (x: index, code: vsib ? 0 : code, file);
14851 if (scale != 1 || vsib)
14852 fprintf (stream: file, format: "*%d", scale);
14853 }
14854 putc (c: ']', stream: file);
14855 }
14856 }
14857}
14858
14859static void
14860ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14861{
14862 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14863 output_operand_lossage ("invalid constraints for operand");
14864 else
14865 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false);
14866}
14867
14868/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14869
14870static bool
14871i386_asm_output_addr_const_extra (FILE *file, rtx x)
14872{
14873 rtx op;
14874
14875 if (GET_CODE (x) != UNSPEC)
14876 return false;
14877
14878 op = XVECEXP (x, 0, 0);
14879 switch (XINT (x, 1))
14880 {
14881 case UNSPEC_GOTOFF:
14882 output_addr_const (file, op);
14883 fputs (s: "@gotoff", stream: file);
14884 break;
14885 case UNSPEC_GOTTPOFF:
14886 output_addr_const (file, op);
14887 /* FIXME: This might be @TPOFF in Sun ld. */
14888 fputs (s: "@gottpoff", stream: file);
14889 break;
14890 case UNSPEC_TPOFF:
14891 output_addr_const (file, op);
14892 fputs (s: "@tpoff", stream: file);
14893 break;
14894 case UNSPEC_NTPOFF:
14895 output_addr_const (file, op);
14896 if (TARGET_64BIT)
14897 fputs (s: "@tpoff", stream: file);
14898 else
14899 fputs (s: "@ntpoff", stream: file);
14900 break;
14901 case UNSPEC_DTPOFF:
14902 output_addr_const (file, op);
14903 fputs (s: "@dtpoff", stream: file);
14904 break;
14905 case UNSPEC_GOTNTPOFF:
14906 output_addr_const (file, op);
14907 if (TARGET_64BIT)
14908 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14909 "@gottpoff(%rip)" : "@gottpoff[rip]", stream: file);
14910 else
14911 fputs (s: "@gotntpoff", stream: file);
14912 break;
14913 case UNSPEC_INDNTPOFF:
14914 output_addr_const (file, op);
14915 fputs (s: "@indntpoff", stream: file);
14916 break;
14917 case UNSPEC_SECREL32:
14918 output_addr_const (file, op);
14919 fputs (s: "@secrel32", stream: file);
14920 break;
14921#if TARGET_MACHO
14922 case UNSPEC_MACHOPIC_OFFSET:
14923 output_addr_const (file, op);
14924 putc ('-', file);
14925 machopic_output_function_base_name (file);
14926 break;
14927#endif
14928
14929 default:
14930 return false;
14931 }
14932
14933 return true;
14934}
14935
14936
14937/* Output code to perform a 387 binary operation in INSN, one of PLUS,
14938 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14939 is the expression of the binary operation. The output may either be
14940 emitted here, or returned to the caller, like all output_* functions.
14941
14942 There is no guarantee that the operands are the same mode, as they
14943 might be within FLOAT or FLOAT_EXTEND expressions. */
14944
14945#ifndef SYSV386_COMPAT
14946/* Set to 1 for compatibility with brain-damaged assemblers. No-one
14947 wants to fix the assemblers because that causes incompatibility
14948 with gcc. No-one wants to fix gcc because that causes
14949 incompatibility with assemblers... You can use the option of
14950 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14951#define SYSV386_COMPAT 1
14952#endif
14953
14954const char *
14955output_387_binary_op (rtx_insn *insn, rtx *operands)
14956{
14957 static char buf[40];
14958 const char *p;
14959 bool is_sse
14960 = (SSE_REG_P (operands[0])
14961 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14962
14963 if (is_sse)
14964 p = "%v";
14965 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14966 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14967 p = "fi";
14968 else
14969 p = "f";
14970
14971 strcpy (dest: buf, src: p);
14972
14973 switch (GET_CODE (operands[3]))
14974 {
14975 case PLUS:
14976 p = "add"; break;
14977 case MINUS:
14978 p = "sub"; break;
14979 case MULT:
14980 p = "mul"; break;
14981 case DIV:
14982 p = "div"; break;
14983 default:
14984 gcc_unreachable ();
14985 }
14986
14987 strcat (dest: buf, src: p);
14988
14989 if (is_sse)
14990 {
14991 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14992 strcat (dest: buf, src: p);
14993
14994 if (TARGET_AVX)
14995 p = "\t{%2, %1, %0|%0, %1, %2}";
14996 else
14997 p = "\t{%2, %0|%0, %2}";
14998
14999 strcat (dest: buf, src: p);
15000 return buf;
15001 }
15002
15003 /* Even if we do not want to check the inputs, this documents input
15004 constraints. Which helps in understanding the following code. */
15005 if (flag_checking)
15006 {
15007 if (STACK_REG_P (operands[0])
15008 && ((REG_P (operands[1])
15009 && REGNO (operands[0]) == REGNO (operands[1])
15010 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15011 || (REG_P (operands[2])
15012 && REGNO (operands[0]) == REGNO (operands[2])
15013 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15014 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15015 ; /* ok */
15016 else
15017 gcc_unreachable ();
15018 }
15019
15020 switch (GET_CODE (operands[3]))
15021 {
15022 case MULT:
15023 case PLUS:
15024 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15025 std::swap (a&: operands[1], b&: operands[2]);
15026
15027 /* know operands[0] == operands[1]. */
15028
15029 if (MEM_P (operands[2]))
15030 {
15031 p = "%Z2\t%2";
15032 break;
15033 }
15034
15035 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15036 {
15037 if (STACK_TOP_P (operands[0]))
15038 /* How is it that we are storing to a dead operand[2]?
15039 Well, presumably operands[1] is dead too. We can't
15040 store the result to st(0) as st(0) gets popped on this
15041 instruction. Instead store to operands[2] (which I
15042 think has to be st(1)). st(1) will be popped later.
15043 gcc <= 2.8.1 didn't have this check and generated
15044 assembly code that the Unixware assembler rejected. */
15045 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15046 else
15047 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15048 break;
15049 }
15050
15051 if (STACK_TOP_P (operands[0]))
15052 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15053 else
15054 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15055 break;
15056
15057 case MINUS:
15058 case DIV:
15059 if (MEM_P (operands[1]))
15060 {
15061 p = "r%Z1\t%1";
15062 break;
15063 }
15064
15065 if (MEM_P (operands[2]))
15066 {
15067 p = "%Z2\t%2";
15068 break;
15069 }
15070
15071 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15072 {
15073#if SYSV386_COMPAT
15074 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15075 derived assemblers, confusingly reverse the direction of
15076 the operation for fsub{r} and fdiv{r} when the
15077 destination register is not st(0). The Intel assembler
15078 doesn't have this brain damage. Read !SYSV386_COMPAT to
15079 figure out what the hardware really does. */
15080 if (STACK_TOP_P (operands[0]))
15081 p = "{p\t%0, %2|rp\t%2, %0}";
15082 else
15083 p = "{rp\t%2, %0|p\t%0, %2}";
15084#else
15085 if (STACK_TOP_P (operands[0]))
15086 /* As above for fmul/fadd, we can't store to st(0). */
15087 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15088 else
15089 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15090#endif
15091 break;
15092 }
15093
15094 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15095 {
15096#if SYSV386_COMPAT
15097 if (STACK_TOP_P (operands[0]))
15098 p = "{rp\t%0, %1|p\t%1, %0}";
15099 else
15100 p = "{p\t%1, %0|rp\t%0, %1}";
15101#else
15102 if (STACK_TOP_P (operands[0]))
15103 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15104 else
15105 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15106#endif
15107 break;
15108 }
15109
15110 if (STACK_TOP_P (operands[0]))
15111 {
15112 if (STACK_TOP_P (operands[1]))
15113 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15114 else
15115 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15116 break;
15117 }
15118 else if (STACK_TOP_P (operands[1]))
15119 {
15120#if SYSV386_COMPAT
15121 p = "{\t%1, %0|r\t%0, %1}";
15122#else
15123 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15124#endif
15125 }
15126 else
15127 {
15128#if SYSV386_COMPAT
15129 p = "{r\t%2, %0|\t%0, %2}";
15130#else
15131 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15132#endif
15133 }
15134 break;
15135
15136 default:
15137 gcc_unreachable ();
15138 }
15139
15140 strcat (dest: buf, src: p);
15141 return buf;
15142}
15143
15144/* Return needed mode for entity in optimize_mode_switching pass. */
15145
15146static int
15147ix86_dirflag_mode_needed (rtx_insn *insn)
15148{
15149 if (CALL_P (insn))
15150 {
15151 if (cfun->machine->func_type == TYPE_NORMAL)
15152 return X86_DIRFLAG_ANY;
15153 else
15154 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15155 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15156 }
15157
15158 if (recog_memoized (insn) < 0)
15159 return X86_DIRFLAG_ANY;
15160
15161 if (get_attr_type (insn) == TYPE_STR)
15162 {
15163 /* Emit cld instruction if stringops are used in the function. */
15164 if (cfun->machine->func_type == TYPE_NORMAL)
15165 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15166 else
15167 return X86_DIRFLAG_RESET;
15168 }
15169
15170 return X86_DIRFLAG_ANY;
15171}
15172
15173/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15174
15175static bool
15176ix86_check_avx_upper_register (const_rtx exp)
15177{
15178 /* construct_container may return a parallel with expr_list
15179 which contains the real reg and mode */
15180 subrtx_iterator::array_type array;
15181 FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
15182 {
15183 const_rtx x = *iter;
15184 if (SSE_REG_P (x)
15185 && !EXT_REX_SSE_REG_P (x)
15186 && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
15187 return true;
15188 }
15189
15190 return false;
15191}
15192
15193/* Check if a 256bit or 512bit AVX register is referenced in stores. */
15194
15195static void
15196ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15197{
15198 if (SSE_REG_P (dest)
15199 && !EXT_REX_SSE_REG_P (dest)
15200 && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15201 {
15202 bool *used = (bool *) data;
15203 *used = true;
15204 }
15205}
15206
15207/* Return needed mode for entity in optimize_mode_switching pass. */
15208
15209static int
15210ix86_avx_u128_mode_needed (rtx_insn *insn)
15211{
15212 if (DEBUG_INSN_P (insn))
15213 return AVX_U128_ANY;
15214
15215 if (CALL_P (insn))
15216 {
15217 rtx link;
15218
15219 /* Needed mode is set to AVX_U128_CLEAN if there are
15220 no 256bit or 512bit modes used in function arguments. */
15221 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15222 link;
15223 link = XEXP (link, 1))
15224 {
15225 if (GET_CODE (XEXP (link, 0)) == USE)
15226 {
15227 rtx arg = XEXP (XEXP (link, 0), 0);
15228
15229 if (ix86_check_avx_upper_register (exp: arg))
15230 return AVX_U128_DIRTY;
15231 }
15232 }
15233
15234 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15235 nor 512bit registers used in the function return register. */
15236 bool avx_upper_reg_found = false;
15237 note_stores (insn, ix86_check_avx_upper_stores,
15238 &avx_upper_reg_found);
15239 if (avx_upper_reg_found)
15240 return AVX_U128_DIRTY;
15241
15242 /* If the function is known to preserve some SSE registers,
15243 RA and previous passes can legitimately rely on that for
15244 modes wider than 256 bits. It's only safe to issue a
15245 vzeroupper if all SSE registers are clobbered. */
15246 const function_abi &abi = insn_callee_abi (insn);
15247 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15248 /* Should be safe to issue an vzeroupper before sibling_call_p.
15249 Also there not mode_exit for sibling_call, so there could be
15250 missing vzeroupper for that. */
15251 || !(SIBLING_CALL_P (insn)
15252 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15253 y: abi.mode_clobbers (V4DImode))))
15254 return AVX_U128_ANY;
15255
15256 return AVX_U128_CLEAN;
15257 }
15258
15259 rtx set = single_set (insn);
15260 if (set)
15261 {
15262 rtx dest = SET_DEST (set);
15263 rtx src = SET_SRC (set);
15264 if (SSE_REG_P (dest)
15265 && !EXT_REX_SSE_REG_P (dest)
15266 && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15267 {
15268 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15269 source isn't zero. */
15270 if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1)
15271 return AVX_U128_DIRTY;
15272 else
15273 return AVX_U128_ANY;
15274 }
15275 else
15276 {
15277 if (ix86_check_avx_upper_register (exp: src))
15278 return AVX_U128_DIRTY;
15279 }
15280
15281 /* This isn't YMM/ZMM load/store. */
15282 return AVX_U128_ANY;
15283 }
15284
15285 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15286 Hardware changes state only when a 256bit register is written to,
15287 but we need to prevent the compiler from moving optimal insertion
15288 point above eventual read from 256bit or 512 bit register. */
15289 if (ix86_check_avx_upper_register (exp: PATTERN (insn)))
15290 return AVX_U128_DIRTY;
15291
15292 return AVX_U128_ANY;
15293}
15294
15295/* Return mode that i387 must be switched into
15296 prior to the execution of insn. */
15297
15298static int
15299ix86_i387_mode_needed (int entity, rtx_insn *insn)
15300{
15301 enum attr_i387_cw mode;
15302
15303 /* The mode UNINITIALIZED is used to store control word after a
15304 function call or ASM pattern. The mode ANY specify that function
15305 has no requirements on the control word and make no changes in the
15306 bits we are interested in. */
15307
15308 if (CALL_P (insn)
15309 || (NONJUMP_INSN_P (insn)
15310 && (asm_noperands (PATTERN (insn)) >= 0
15311 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15312 return I387_CW_UNINITIALIZED;
15313
15314 if (recog_memoized (insn) < 0)
15315 return I387_CW_ANY;
15316
15317 mode = get_attr_i387_cw (insn);
15318
15319 switch (entity)
15320 {
15321 case I387_ROUNDEVEN:
15322 if (mode == I387_CW_ROUNDEVEN)
15323 return mode;
15324 break;
15325
15326 case I387_TRUNC:
15327 if (mode == I387_CW_TRUNC)
15328 return mode;
15329 break;
15330
15331 case I387_FLOOR:
15332 if (mode == I387_CW_FLOOR)
15333 return mode;
15334 break;
15335
15336 case I387_CEIL:
15337 if (mode == I387_CW_CEIL)
15338 return mode;
15339 break;
15340
15341 default:
15342 gcc_unreachable ();
15343 }
15344
15345 return I387_CW_ANY;
15346}
15347
15348/* Return mode that entity must be switched into
15349 prior to the execution of insn. */
15350
15351static int
15352ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15353{
15354 switch (entity)
15355 {
15356 case X86_DIRFLAG:
15357 return ix86_dirflag_mode_needed (insn);
15358 case AVX_U128:
15359 return ix86_avx_u128_mode_needed (insn);
15360 case I387_ROUNDEVEN:
15361 case I387_TRUNC:
15362 case I387_FLOOR:
15363 case I387_CEIL:
15364 return ix86_i387_mode_needed (entity, insn);
15365 default:
15366 gcc_unreachable ();
15367 }
15368 return 0;
15369}
15370
15371/* Calculate mode of upper 128bit AVX registers after the insn. */
15372
15373static int
15374ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15375{
15376 rtx pat = PATTERN (insn);
15377
15378 if (vzeroupper_pattern (pat, VOIDmode)
15379 || vzeroall_pattern (pat, VOIDmode))
15380 return AVX_U128_CLEAN;
15381
15382 /* We know that state is clean after CALL insn if there are no
15383 256bit or 512bit registers used in the function return register. */
15384 if (CALL_P (insn))
15385 {
15386 bool avx_upper_reg_found = false;
15387 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15388
15389 if (avx_upper_reg_found)
15390 return AVX_U128_DIRTY;
15391
15392 /* If the function desn't clobber any sse registers or only clobber
15393 128-bit part, Then vzeroupper isn't issued before the function exit.
15394 the status not CLEAN but ANY after the function. */
15395 const function_abi &abi = insn_callee_abi (insn);
15396 if (!(SIBLING_CALL_P (insn)
15397 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15398 y: abi.mode_clobbers (V4DImode))))
15399 return AVX_U128_ANY;
15400
15401 return AVX_U128_CLEAN;
15402 }
15403
15404 /* Otherwise, return current mode. Remember that if insn
15405 references AVX 256bit or 512bit registers, the mode was already
15406 changed to DIRTY from MODE_NEEDED. */
15407 return mode;
15408}
15409
15410/* Return the mode that an insn results in. */
15411
15412static int
15413ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15414{
15415 switch (entity)
15416 {
15417 case X86_DIRFLAG:
15418 return mode;
15419 case AVX_U128:
15420 return ix86_avx_u128_mode_after (mode, insn);
15421 case I387_ROUNDEVEN:
15422 case I387_TRUNC:
15423 case I387_FLOOR:
15424 case I387_CEIL:
15425 return mode;
15426 default:
15427 gcc_unreachable ();
15428 }
15429}
15430
15431static int
15432ix86_dirflag_mode_entry (void)
15433{
15434 /* For TARGET_CLD or in the interrupt handler we can't assume
15435 direction flag state at function entry. */
15436 if (TARGET_CLD
15437 || cfun->machine->func_type != TYPE_NORMAL)
15438 return X86_DIRFLAG_ANY;
15439
15440 return X86_DIRFLAG_RESET;
15441}
15442
15443static int
15444ix86_avx_u128_mode_entry (void)
15445{
15446 tree arg;
15447
15448 /* Entry mode is set to AVX_U128_DIRTY if there are
15449 256bit or 512bit modes used in function arguments. */
15450 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15451 arg = TREE_CHAIN (arg))
15452 {
15453 rtx incoming = DECL_INCOMING_RTL (arg);
15454
15455 if (incoming && ix86_check_avx_upper_register (exp: incoming))
15456 return AVX_U128_DIRTY;
15457 }
15458
15459 return AVX_U128_CLEAN;
15460}
15461
15462/* Return a mode that ENTITY is assumed to be
15463 switched to at function entry. */
15464
15465static int
15466ix86_mode_entry (int entity)
15467{
15468 switch (entity)
15469 {
15470 case X86_DIRFLAG:
15471 return ix86_dirflag_mode_entry ();
15472 case AVX_U128:
15473 return ix86_avx_u128_mode_entry ();
15474 case I387_ROUNDEVEN:
15475 case I387_TRUNC:
15476 case I387_FLOOR:
15477 case I387_CEIL:
15478 return I387_CW_ANY;
15479 default:
15480 gcc_unreachable ();
15481 }
15482}
15483
15484static int
15485ix86_avx_u128_mode_exit (void)
15486{
15487 rtx reg = crtl->return_rtx;
15488
15489 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15490 or 512 bit modes used in the function return register. */
15491 if (reg && ix86_check_avx_upper_register (exp: reg))
15492 return AVX_U128_DIRTY;
15493
15494 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15495 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15496 */
15497 return ix86_avx_u128_mode_entry ();
15498}
15499
15500/* Return a mode that ENTITY is assumed to be
15501 switched to at function exit. */
15502
15503static int
15504ix86_mode_exit (int entity)
15505{
15506 switch (entity)
15507 {
15508 case X86_DIRFLAG:
15509 return X86_DIRFLAG_ANY;
15510 case AVX_U128:
15511 return ix86_avx_u128_mode_exit ();
15512 case I387_ROUNDEVEN:
15513 case I387_TRUNC:
15514 case I387_FLOOR:
15515 case I387_CEIL:
15516 return I387_CW_ANY;
15517 default:
15518 gcc_unreachable ();
15519 }
15520}
15521
15522static int
15523ix86_mode_priority (int, int n)
15524{
15525 return n;
15526}
15527
15528/* Output code to initialize control word copies used by trunc?f?i and
15529 rounding patterns. CURRENT_MODE is set to current control word,
15530 while NEW_MODE is set to new control word. */
15531
15532static void
15533emit_i387_cw_initialization (int mode)
15534{
15535 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15536 rtx new_mode;
15537
15538 enum ix86_stack_slot slot;
15539
15540 rtx reg = gen_reg_rtx (HImode);
15541
15542 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15543 emit_move_insn (reg, copy_rtx (stored_mode));
15544
15545 switch (mode)
15546 {
15547 case I387_CW_ROUNDEVEN:
15548 /* round to nearest */
15549 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15550 slot = SLOT_CW_ROUNDEVEN;
15551 break;
15552
15553 case I387_CW_TRUNC:
15554 /* round toward zero (truncate) */
15555 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15556 slot = SLOT_CW_TRUNC;
15557 break;
15558
15559 case I387_CW_FLOOR:
15560 /* round down toward -oo */
15561 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15562 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15563 slot = SLOT_CW_FLOOR;
15564 break;
15565
15566 case I387_CW_CEIL:
15567 /* round up toward +oo */
15568 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15569 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15570 slot = SLOT_CW_CEIL;
15571 break;
15572
15573 default:
15574 gcc_unreachable ();
15575 }
15576
15577 gcc_assert (slot < MAX_386_STACK_LOCALS);
15578
15579 new_mode = assign_386_stack_local (HImode, slot);
15580 emit_move_insn (new_mode, reg);
15581}
15582
15583/* Generate one or more insns to set ENTITY to MODE. */
15584
15585static void
15586ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15587 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15588{
15589 switch (entity)
15590 {
15591 case X86_DIRFLAG:
15592 if (mode == X86_DIRFLAG_RESET)
15593 emit_insn (gen_cld ());
15594 break;
15595 case AVX_U128:
15596 if (mode == AVX_U128_CLEAN)
15597 ix86_expand_avx_vzeroupper ();
15598 break;
15599 case I387_ROUNDEVEN:
15600 case I387_TRUNC:
15601 case I387_FLOOR:
15602 case I387_CEIL:
15603 if (mode != I387_CW_ANY
15604 && mode != I387_CW_UNINITIALIZED)
15605 emit_i387_cw_initialization (mode);
15606 break;
15607 default:
15608 gcc_unreachable ();
15609 }
15610}
15611
15612/* Output code for INSN to convert a float to a signed int. OPERANDS
15613 are the insn operands. The output may be [HSD]Imode and the input
15614 operand may be [SDX]Fmode. */
15615
15616const char *
15617output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15618{
15619 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15620 bool dimode_p = GET_MODE (operands[0]) == DImode;
15621 int round_mode = get_attr_i387_cw (insn);
15622
15623 static char buf[40];
15624 const char *p;
15625
15626 /* Jump through a hoop or two for DImode, since the hardware has no
15627 non-popping instruction. We used to do this a different way, but
15628 that was somewhat fragile and broke with post-reload splitters. */
15629 if ((dimode_p || fisttp) && !stack_top_dies)
15630 output_asm_insn ("fld\t%y1", operands);
15631
15632 gcc_assert (STACK_TOP_P (operands[1]));
15633 gcc_assert (MEM_P (operands[0]));
15634 gcc_assert (GET_MODE (operands[1]) != TFmode);
15635
15636 if (fisttp)
15637 return "fisttp%Z0\t%0";
15638
15639 strcpy (dest: buf, src: "fist");
15640
15641 if (round_mode != I387_CW_ANY)
15642 output_asm_insn ("fldcw\t%3", operands);
15643
15644 p = "p%Z0\t%0";
15645 strcat (dest: buf, src: p + !(stack_top_dies || dimode_p));
15646
15647 output_asm_insn (buf, operands);
15648
15649 if (round_mode != I387_CW_ANY)
15650 output_asm_insn ("fldcw\t%2", operands);
15651
15652 return "";
15653}
15654
15655/* Output code for x87 ffreep insn. The OPNO argument, which may only
15656 have the values zero or one, indicates the ffreep insn's operand
15657 from the OPERANDS array. */
15658
15659static const char *
15660output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15661{
15662 if (TARGET_USE_FFREEP)
15663#ifdef HAVE_AS_IX86_FFREEP
15664 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15665#else
15666 {
15667 static char retval[32];
15668 int regno = REGNO (operands[opno]);
15669
15670 gcc_assert (STACK_REGNO_P (regno));
15671
15672 regno -= FIRST_STACK_REG;
15673
15674 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15675 return retval;
15676 }
15677#endif
15678
15679 return opno ? "fstp\t%y1" : "fstp\t%y0";
15680}
15681
15682
15683/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15684 should be used. UNORDERED_P is true when fucom should be used. */
15685
15686const char *
15687output_fp_compare (rtx_insn *insn, rtx *operands,
15688 bool eflags_p, bool unordered_p)
15689{
15690 rtx *xops = eflags_p ? &operands[0] : &operands[1];
15691 bool stack_top_dies;
15692
15693 static char buf[40];
15694 const char *p;
15695
15696 gcc_assert (STACK_TOP_P (xops[0]));
15697
15698 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15699
15700 if (eflags_p)
15701 {
15702 p = unordered_p ? "fucomi" : "fcomi";
15703 strcpy (dest: buf, src: p);
15704
15705 p = "p\t{%y1, %0|%0, %y1}";
15706 strcat (dest: buf, src: p + !stack_top_dies);
15707
15708 return buf;
15709 }
15710
15711 if (STACK_REG_P (xops[1])
15712 && stack_top_dies
15713 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15714 {
15715 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15716
15717 /* If both the top of the 387 stack die, and the other operand
15718 is also a stack register that dies, then this must be a
15719 `fcompp' float compare. */
15720 p = unordered_p ? "fucompp" : "fcompp";
15721 strcpy (dest: buf, src: p);
15722 }
15723 else if (const0_operand (xops[1], VOIDmode))
15724 {
15725 gcc_assert (!unordered_p);
15726 strcpy (dest: buf, src: "ftst");
15727 }
15728 else
15729 {
15730 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15731 {
15732 gcc_assert (!unordered_p);
15733 p = "ficom";
15734 }
15735 else
15736 p = unordered_p ? "fucom" : "fcom";
15737
15738 strcpy (dest: buf, src: p);
15739
15740 p = "p%Z2\t%y2";
15741 strcat (dest: buf, src: p + !stack_top_dies);
15742 }
15743
15744 output_asm_insn (buf, operands);
15745 return "fnstsw\t%0";
15746}
15747
15748void
15749ix86_output_addr_vec_elt (FILE *file, int value)
15750{
15751 const char *directive = ASM_LONG;
15752
15753#ifdef ASM_QUAD
15754 if (TARGET_LP64)
15755 directive = ASM_QUAD;
15756#else
15757 gcc_assert (!TARGET_64BIT);
15758#endif
15759
15760 fprintf (stream: file, format: "%s%s%d\n", directive, LPREFIX, value);
15761}
15762
15763void
15764ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15765{
15766 const char *directive = ASM_LONG;
15767
15768#ifdef ASM_QUAD
15769 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15770 directive = ASM_QUAD;
15771#else
15772 gcc_assert (!TARGET_64BIT);
15773#endif
15774 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15775 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15776 fprintf (stream: file, format: "%s%s%d-%s%d\n",
15777 directive, LPREFIX, value, LPREFIX, rel);
15778#if TARGET_MACHO
15779 else if (TARGET_MACHO)
15780 {
15781 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15782 machopic_output_function_base_name (file);
15783 putc ('\n', file);
15784 }
15785#endif
15786 else if (HAVE_AS_GOTOFF_IN_DATA)
15787 fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15788 else
15789 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15790 GOT_SYMBOL_NAME, LPREFIX, value);
15791}
15792
15793#define LEA_MAX_STALL (3)
15794#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15795
15796/* Increase given DISTANCE in half-cycles according to
15797 dependencies between PREV and NEXT instructions.
15798 Add 1 half-cycle if there is no dependency and
15799 go to next cycle if there is some dependecy. */
15800
15801static unsigned int
15802increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15803{
15804 df_ref def, use;
15805
15806 if (!prev || !next)
15807 return distance + (distance & 1) + 2;
15808
15809 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15810 return distance + 1;
15811
15812 FOR_EACH_INSN_USE (use, next)
15813 FOR_EACH_INSN_DEF (def, prev)
15814 if (!DF_REF_IS_ARTIFICIAL (def)
15815 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15816 return distance + (distance & 1) + 2;
15817
15818 return distance + 1;
15819}
15820
15821/* Function checks if instruction INSN defines register number
15822 REGNO1 or REGNO2. */
15823
15824bool
15825insn_defines_reg (unsigned int regno1, unsigned int regno2,
15826 rtx_insn *insn)
15827{
15828 df_ref def;
15829
15830 FOR_EACH_INSN_DEF (def, insn)
15831 if (DF_REF_REG_DEF_P (def)
15832 && !DF_REF_IS_ARTIFICIAL (def)
15833 && (regno1 == DF_REF_REGNO (def)
15834 || regno2 == DF_REF_REGNO (def)))
15835 return true;
15836
15837 return false;
15838}
15839
15840/* Function checks if instruction INSN uses register number
15841 REGNO as a part of address expression. */
15842
15843static bool
15844insn_uses_reg_mem (unsigned int regno, rtx insn)
15845{
15846 df_ref use;
15847
15848 FOR_EACH_INSN_USE (use, insn)
15849 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15850 return true;
15851
15852 return false;
15853}
15854
15855/* Search backward for non-agu definition of register number REGNO1
15856 or register number REGNO2 in basic block starting from instruction
15857 START up to head of basic block or instruction INSN.
15858
15859 Function puts true value into *FOUND var if definition was found
15860 and false otherwise.
15861
15862 Distance in half-cycles between START and found instruction or head
15863 of BB is added to DISTANCE and returned. */
15864
15865static int
15866distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15867 rtx_insn *insn, int distance,
15868 rtx_insn *start, bool *found)
15869{
15870 basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL;
15871 rtx_insn *prev = start;
15872 rtx_insn *next = NULL;
15873
15874 *found = false;
15875
15876 while (prev
15877 && prev != insn
15878 && distance < LEA_SEARCH_THRESHOLD)
15879 {
15880 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15881 {
15882 distance = increase_distance (prev, next, distance);
15883 if (insn_defines_reg (regno1, regno2, insn: prev))
15884 {
15885 if (recog_memoized (insn: prev) < 0
15886 || get_attr_type (prev) != TYPE_LEA)
15887 {
15888 *found = true;
15889 return distance;
15890 }
15891 }
15892
15893 next = prev;
15894 }
15895 if (prev == BB_HEAD (bb))
15896 break;
15897
15898 prev = PREV_INSN (insn: prev);
15899 }
15900
15901 return distance;
15902}
15903
15904/* Search backward for non-agu definition of register number REGNO1
15905 or register number REGNO2 in INSN's basic block until
15906 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15907 2. Reach neighbor BBs boundary, or
15908 3. Reach agu definition.
15909 Returns the distance between the non-agu definition point and INSN.
15910 If no definition point, returns -1. */
15911
15912static int
15913distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15914 rtx_insn *insn)
15915{
15916 basic_block bb = BLOCK_FOR_INSN (insn);
15917 int distance = 0;
15918 bool found = false;
15919
15920 if (insn != BB_HEAD (bb))
15921 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15922 distance, start: PREV_INSN (insn),
15923 found: &found);
15924
15925 if (!found && distance < LEA_SEARCH_THRESHOLD)
15926 {
15927 edge e;
15928 edge_iterator ei;
15929 bool simple_loop = false;
15930
15931 FOR_EACH_EDGE (e, ei, bb->preds)
15932 if (e->src == bb)
15933 {
15934 simple_loop = true;
15935 break;
15936 }
15937
15938 if (simple_loop)
15939 distance = distance_non_agu_define_in_bb (regno1, regno2,
15940 insn, distance,
15941 BB_END (bb), found: &found);
15942 else
15943 {
15944 int shortest_dist = -1;
15945 bool found_in_bb = false;
15946
15947 FOR_EACH_EDGE (e, ei, bb->preds)
15948 {
15949 int bb_dist
15950 = distance_non_agu_define_in_bb (regno1, regno2,
15951 insn, distance,
15952 BB_END (e->src),
15953 found: &found_in_bb);
15954 if (found_in_bb)
15955 {
15956 if (shortest_dist < 0)
15957 shortest_dist = bb_dist;
15958 else if (bb_dist > 0)
15959 shortest_dist = MIN (bb_dist, shortest_dist);
15960
15961 found = true;
15962 }
15963 }
15964
15965 distance = shortest_dist;
15966 }
15967 }
15968
15969 if (!found)
15970 return -1;
15971
15972 return distance >> 1;
15973}
15974
15975/* Return the distance in half-cycles between INSN and the next
15976 insn that uses register number REGNO in memory address added
15977 to DISTANCE. Return -1 if REGNO0 is set.
15978
15979 Put true value into *FOUND if register usage was found and
15980 false otherwise.
15981 Put true value into *REDEFINED if register redefinition was
15982 found and false otherwise. */
15983
15984static int
15985distance_agu_use_in_bb (unsigned int regno,
15986 rtx_insn *insn, int distance, rtx_insn *start,
15987 bool *found, bool *redefined)
15988{
15989 basic_block bb = NULL;
15990 rtx_insn *next = start;
15991 rtx_insn *prev = NULL;
15992
15993 *found = false;
15994 *redefined = false;
15995
15996 if (start != NULL_RTX)
15997 {
15998 bb = BLOCK_FOR_INSN (insn: start);
15999 if (start != BB_HEAD (bb))
16000 /* If insn and start belong to the same bb, set prev to insn,
16001 so the call to increase_distance will increase the distance
16002 between insns by 1. */
16003 prev = insn;
16004 }
16005
16006 while (next
16007 && next != insn
16008 && distance < LEA_SEARCH_THRESHOLD)
16009 {
16010 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16011 {
16012 distance = increase_distance(prev, next, distance);
16013 if (insn_uses_reg_mem (regno, insn: next))
16014 {
16015 /* Return DISTANCE if OP0 is used in memory
16016 address in NEXT. */
16017 *found = true;
16018 return distance;
16019 }
16020
16021 if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next))
16022 {
16023 /* Return -1 if OP0 is set in NEXT. */
16024 *redefined = true;
16025 return -1;
16026 }
16027
16028 prev = next;
16029 }
16030
16031 if (next == BB_END (bb))
16032 break;
16033
16034 next = NEXT_INSN (insn: next);
16035 }
16036
16037 return distance;
16038}
16039
16040/* Return the distance between INSN and the next insn that uses
16041 register number REGNO0 in memory address. Return -1 if no such
16042 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16043
16044static int
16045distance_agu_use (unsigned int regno0, rtx_insn *insn)
16046{
16047 basic_block bb = BLOCK_FOR_INSN (insn);
16048 int distance = 0;
16049 bool found = false;
16050 bool redefined = false;
16051
16052 if (insn != BB_END (bb))
16053 distance = distance_agu_use_in_bb (regno: regno0, insn, distance,
16054 start: NEXT_INSN (insn),
16055 found: &found, redefined: &redefined);
16056
16057 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16058 {
16059 edge e;
16060 edge_iterator ei;
16061 bool simple_loop = false;
16062
16063 FOR_EACH_EDGE (e, ei, bb->succs)
16064 if (e->dest == bb)
16065 {
16066 simple_loop = true;
16067 break;
16068 }
16069
16070 if (simple_loop)
16071 distance = distance_agu_use_in_bb (regno: regno0, insn,
16072 distance, BB_HEAD (bb),
16073 found: &found, redefined: &redefined);
16074 else
16075 {
16076 int shortest_dist = -1;
16077 bool found_in_bb = false;
16078 bool redefined_in_bb = false;
16079
16080 FOR_EACH_EDGE (e, ei, bb->succs)
16081 {
16082 int bb_dist
16083 = distance_agu_use_in_bb (regno: regno0, insn,
16084 distance, BB_HEAD (e->dest),
16085 found: &found_in_bb, redefined: &redefined_in_bb);
16086 if (found_in_bb)
16087 {
16088 if (shortest_dist < 0)
16089 shortest_dist = bb_dist;
16090 else if (bb_dist > 0)
16091 shortest_dist = MIN (bb_dist, shortest_dist);
16092
16093 found = true;
16094 }
16095 }
16096
16097 distance = shortest_dist;
16098 }
16099 }
16100
16101 if (!found || redefined)
16102 return -1;
16103
16104 return distance >> 1;
16105}
16106
16107/* Define this macro to tune LEA priority vs ADD, it take effect when
16108 there is a dilemma of choosing LEA or ADD
16109 Negative value: ADD is more preferred than LEA
16110 Zero: Neutral
16111 Positive value: LEA is more preferred than ADD. */
16112#define IX86_LEA_PRIORITY 0
16113
16114/* Return true if usage of lea INSN has performance advantage
16115 over a sequence of instructions. Instructions sequence has
16116 SPLIT_COST cycles higher latency than lea latency. */
16117
16118static bool
16119ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
16120 unsigned int regno2, int split_cost, bool has_scale)
16121{
16122 int dist_define, dist_use;
16123
16124 /* For Atom processors newer than Bonnell, if using a 2-source or
16125 3-source LEA for non-destructive destination purposes, or due to
16126 wanting ability to use SCALE, the use of LEA is justified. */
16127 if (!TARGET_CPU_P (BONNELL))
16128 {
16129 if (has_scale)
16130 return true;
16131 if (split_cost < 1)
16132 return false;
16133 if (regno0 == regno1 || regno0 == regno2)
16134 return false;
16135 return true;
16136 }
16137
16138 /* Remember recog_data content. */
16139 struct recog_data_d recog_data_save = recog_data;
16140
16141 dist_define = distance_non_agu_define (regno1, regno2, insn);
16142 dist_use = distance_agu_use (regno0, insn);
16143
16144 /* distance_non_agu_define can call get_attr_type which can call
16145 recog_memoized, restore recog_data back to previous content. */
16146 recog_data = recog_data_save;
16147
16148 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16149 {
16150 /* If there is no non AGU operand definition, no AGU
16151 operand usage and split cost is 0 then both lea
16152 and non lea variants have same priority. Currently
16153 we prefer lea for 64 bit code and non lea on 32 bit
16154 code. */
16155 if (dist_use < 0 && split_cost == 0)
16156 return TARGET_64BIT || IX86_LEA_PRIORITY;
16157 else
16158 return true;
16159 }
16160
16161 /* With longer definitions distance lea is more preferable.
16162 Here we change it to take into account splitting cost and
16163 lea priority. */
16164 dist_define += split_cost + IX86_LEA_PRIORITY;
16165
16166 /* If there is no use in memory addess then we just check
16167 that split cost exceeds AGU stall. */
16168 if (dist_use < 0)
16169 return dist_define > LEA_MAX_STALL;
16170
16171 /* If this insn has both backward non-agu dependence and forward
16172 agu dependence, the one with short distance takes effect. */
16173 return dist_define >= dist_use;
16174}
16175
16176/* Return true if we need to split op0 = op1 + op2 into a sequence of
16177 move and add to avoid AGU stalls. */
16178
16179bool
16180ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16181{
16182 unsigned int regno0, regno1, regno2;
16183
16184 /* Check if we need to optimize. */
16185 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16186 return false;
16187
16188 regno0 = true_regnum (operands[0]);
16189 regno1 = true_regnum (operands[1]);
16190 regno2 = true_regnum (operands[2]);
16191
16192 /* We need to split only adds with non destructive
16193 destination operand. */
16194 if (regno0 == regno1 || regno0 == regno2)
16195 return false;
16196 else
16197 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false);
16198}
16199
16200/* Return true if we should emit lea instruction instead of mov
16201 instruction. */
16202
16203bool
16204ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16205{
16206 unsigned int regno0, regno1;
16207
16208 /* Check if we need to optimize. */
16209 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16210 return false;
16211
16212 /* Use lea for reg to reg moves only. */
16213 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16214 return false;
16215
16216 regno0 = true_regnum (operands[0]);
16217 regno1 = true_regnum (operands[1]);
16218
16219 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false);
16220}
16221
16222/* Return true if we need to split lea into a sequence of
16223 instructions to avoid AGU stalls during peephole2. */
16224
16225bool
16226ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16227{
16228 unsigned int regno0, regno1, regno2;
16229 int split_cost;
16230 struct ix86_address parts;
16231 int ok;
16232
16233 /* The "at least two components" test below might not catch simple
16234 move or zero extension insns if parts.base is non-NULL and parts.disp
16235 is const0_rtx as the only components in the address, e.g. if the
16236 register is %rbp or %r13. As this test is much cheaper and moves or
16237 zero extensions are the common case, do this check first. */
16238 if (REG_P (operands[1])
16239 || (SImode_address_operand (operands[1], VOIDmode)
16240 && REG_P (XEXP (operands[1], 0))))
16241 return false;
16242
16243 ok = ix86_decompose_address (addr: operands[1], out: &parts);
16244 gcc_assert (ok);
16245
16246 /* There should be at least two components in the address. */
16247 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16248 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16249 return false;
16250
16251 /* We should not split into add if non legitimate pic
16252 operand is used as displacement. */
16253 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16254 return false;
16255
16256 regno0 = true_regnum (operands[0]) ;
16257 regno1 = INVALID_REGNUM;
16258 regno2 = INVALID_REGNUM;
16259
16260 if (parts.base)
16261 regno1 = true_regnum (parts.base);
16262 if (parts.index)
16263 regno2 = true_regnum (parts.index);
16264
16265 /* Use add for a = a + b and a = b + a since it is faster and shorter
16266 than lea for most processors. For the processors like BONNELL, if
16267 the destination register of LEA holds an actual address which will
16268 be used soon, LEA is better and otherwise ADD is better. */
16269 if (!TARGET_CPU_P (BONNELL)
16270 && parts.scale == 1
16271 && (!parts.disp || parts.disp == const0_rtx)
16272 && (regno0 == regno1 || regno0 == regno2))
16273 return true;
16274
16275 /* Split with -Oz if the encoding requires fewer bytes. */
16276 if (optimize_size > 1
16277 && parts.scale > 1
16278 && !parts.base
16279 && (!parts.disp || parts.disp == const0_rtx))
16280 return true;
16281
16282 /* Check we need to optimize. */
16283 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16284 return false;
16285
16286 split_cost = 0;
16287
16288 /* Compute how many cycles we will add to execution time
16289 if split lea into a sequence of instructions. */
16290 if (parts.base || parts.index)
16291 {
16292 /* Have to use mov instruction if non desctructive
16293 destination form is used. */
16294 if (regno1 != regno0 && regno2 != regno0)
16295 split_cost += 1;
16296
16297 /* Have to add index to base if both exist. */
16298 if (parts.base && parts.index)
16299 split_cost += 1;
16300
16301 /* Have to use shift and adds if scale is 2 or greater. */
16302 if (parts.scale > 1)
16303 {
16304 if (regno0 != regno1)
16305 split_cost += 1;
16306 else if (regno2 == regno0)
16307 split_cost += 4;
16308 else
16309 split_cost += parts.scale;
16310 }
16311
16312 /* Have to use add instruction with immediate if
16313 disp is non zero. */
16314 if (parts.disp && parts.disp != const0_rtx)
16315 split_cost += 1;
16316
16317 /* Subtract the price of lea. */
16318 split_cost -= 1;
16319 }
16320
16321 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16322 has_scale: parts.scale > 1);
16323}
16324
16325/* Return true if it is ok to optimize an ADD operation to LEA
16326 operation to avoid flag register consumation. For most processors,
16327 ADD is faster than LEA. For the processors like BONNELL, if the
16328 destination register of LEA holds an actual address which will be
16329 used soon, LEA is better and otherwise ADD is better. */
16330
16331bool
16332ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16333{
16334 unsigned int regno0 = true_regnum (operands[0]);
16335 unsigned int regno1 = true_regnum (operands[1]);
16336 unsigned int regno2 = true_regnum (operands[2]);
16337
16338 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16339 if (regno0 != regno1 && regno0 != regno2)
16340 return true;
16341
16342 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16343 return false;
16344
16345 return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false);
16346}
16347
16348/* Return true if destination reg of SET_BODY is shift count of
16349 USE_BODY. */
16350
16351static bool
16352ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16353{
16354 rtx set_dest;
16355 rtx shift_rtx;
16356 int i;
16357
16358 /* Retrieve destination of SET_BODY. */
16359 switch (GET_CODE (set_body))
16360 {
16361 case SET:
16362 set_dest = SET_DEST (set_body);
16363 if (!set_dest || !REG_P (set_dest))
16364 return false;
16365 break;
16366 case PARALLEL:
16367 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16368 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16369 use_body))
16370 return true;
16371 /* FALLTHROUGH */
16372 default:
16373 return false;
16374 }
16375
16376 /* Retrieve shift count of USE_BODY. */
16377 switch (GET_CODE (use_body))
16378 {
16379 case SET:
16380 shift_rtx = XEXP (use_body, 1);
16381 break;
16382 case PARALLEL:
16383 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16384 if (ix86_dep_by_shift_count_body (set_body,
16385 XVECEXP (use_body, 0, i)))
16386 return true;
16387 /* FALLTHROUGH */
16388 default:
16389 return false;
16390 }
16391
16392 if (shift_rtx
16393 && (GET_CODE (shift_rtx) == ASHIFT
16394 || GET_CODE (shift_rtx) == LSHIFTRT
16395 || GET_CODE (shift_rtx) == ASHIFTRT
16396 || GET_CODE (shift_rtx) == ROTATE
16397 || GET_CODE (shift_rtx) == ROTATERT))
16398 {
16399 rtx shift_count = XEXP (shift_rtx, 1);
16400
16401 /* Return true if shift count is dest of SET_BODY. */
16402 if (REG_P (shift_count))
16403 {
16404 /* Add check since it can be invoked before register
16405 allocation in pre-reload schedule. */
16406 if (reload_completed
16407 && true_regnum (set_dest) == true_regnum (shift_count))
16408 return true;
16409 else if (REGNO(set_dest) == REGNO(shift_count))
16410 return true;
16411 }
16412 }
16413
16414 return false;
16415}
16416
16417/* Return true if destination reg of SET_INSN is shift count of
16418 USE_INSN. */
16419
16420bool
16421ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16422{
16423 return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn),
16424 use_body: PATTERN (insn: use_insn));
16425}
16426
16427/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16428 are ok, keeping in mind the possible movddup alternative. */
16429
16430bool
16431ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16432{
16433 if (MEM_P (operands[0]))
16434 return rtx_equal_p (operands[0], operands[1 + high]);
16435 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16436 return false;
16437 return true;
16438}
16439
16440/* A subroutine of ix86_build_signbit_mask. If VECT is true,
16441 then replicate the value for all elements of the vector
16442 register. */
16443
16444rtx
16445ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16446{
16447 int i, n_elt;
16448 rtvec v;
16449 machine_mode scalar_mode;
16450
16451 switch (mode)
16452 {
16453 case E_V64QImode:
16454 case E_V32QImode:
16455 case E_V16QImode:
16456 case E_V32HImode:
16457 case E_V16HImode:
16458 case E_V8HImode:
16459 case E_V16SImode:
16460 case E_V8SImode:
16461 case E_V4SImode:
16462 case E_V2SImode:
16463 case E_V8DImode:
16464 case E_V4DImode:
16465 case E_V2DImode:
16466 gcc_assert (vect);
16467 /* FALLTHRU */
16468 case E_V2HFmode:
16469 case E_V4HFmode:
16470 case E_V8HFmode:
16471 case E_V16HFmode:
16472 case E_V32HFmode:
16473 case E_V16SFmode:
16474 case E_V8SFmode:
16475 case E_V4SFmode:
16476 case E_V2SFmode:
16477 case E_V8DFmode:
16478 case E_V4DFmode:
16479 case E_V2DFmode:
16480 case E_V32BFmode:
16481 case E_V16BFmode:
16482 case E_V8BFmode:
16483 case E_V4BFmode:
16484 case E_V2BFmode:
16485 n_elt = GET_MODE_NUNITS (mode);
16486 v = rtvec_alloc (n_elt);
16487 scalar_mode = GET_MODE_INNER (mode);
16488
16489 RTVEC_ELT (v, 0) = value;
16490
16491 for (i = 1; i < n_elt; ++i)
16492 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16493
16494 return gen_rtx_CONST_VECTOR (mode, v);
16495
16496 default:
16497 gcc_unreachable ();
16498 }
16499}
16500
16501/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16502 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16503 for an SSE register. If VECT is true, then replicate the mask for
16504 all elements of the vector register. If INVERT is true, then create
16505 a mask excluding the sign bit. */
16506
16507rtx
16508ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16509{
16510 machine_mode vec_mode, imode;
16511 wide_int w;
16512 rtx mask, v;
16513
16514 switch (mode)
16515 {
16516 case E_V2HFmode:
16517 case E_V4HFmode:
16518 case E_V8HFmode:
16519 case E_V16HFmode:
16520 case E_V32HFmode:
16521 case E_V32BFmode:
16522 case E_V16BFmode:
16523 case E_V8BFmode:
16524 case E_V4BFmode:
16525 case E_V2BFmode:
16526 vec_mode = mode;
16527 imode = HImode;
16528 break;
16529
16530 case E_V16SImode:
16531 case E_V16SFmode:
16532 case E_V8SImode:
16533 case E_V4SImode:
16534 case E_V8SFmode:
16535 case E_V4SFmode:
16536 case E_V2SFmode:
16537 case E_V2SImode:
16538 vec_mode = mode;
16539 imode = SImode;
16540 break;
16541
16542 case E_V8DImode:
16543 case E_V4DImode:
16544 case E_V2DImode:
16545 case E_V8DFmode:
16546 case E_V4DFmode:
16547 case E_V2DFmode:
16548 vec_mode = mode;
16549 imode = DImode;
16550 break;
16551
16552 case E_TImode:
16553 case E_TFmode:
16554 vec_mode = VOIDmode;
16555 imode = TImode;
16556 break;
16557
16558 default:
16559 gcc_unreachable ();
16560 }
16561
16562 machine_mode inner_mode = GET_MODE_INNER (mode);
16563 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16564 GET_MODE_BITSIZE (inner_mode));
16565 if (invert)
16566 w = wi::bit_not (x: w);
16567
16568 /* Force this value into the low part of a fp vector constant. */
16569 mask = immed_wide_int_const (w, imode);
16570 mask = gen_lowpart (inner_mode, mask);
16571
16572 if (vec_mode == VOIDmode)
16573 return force_reg (inner_mode, mask);
16574
16575 v = ix86_build_const_vector (mode: vec_mode, vect, value: mask);
16576 return force_reg (vec_mode, v);
16577}
16578
16579/* Return HOST_WIDE_INT for const vector OP in MODE. */
16580
16581HOST_WIDE_INT
16582ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16583{
16584 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16585 gcc_unreachable ();
16586
16587 int nunits = GET_MODE_NUNITS (mode);
16588 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16589 machine_mode innermode = GET_MODE_INNER (mode);
16590 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16591
16592 switch (mode)
16593 {
16594 case E_V2QImode:
16595 case E_V4QImode:
16596 case E_V2HImode:
16597 case E_V8QImode:
16598 case E_V4HImode:
16599 case E_V2SImode:
16600 for (int i = 0; i < nunits; ++i)
16601 {
16602 int v = INTVAL (XVECEXP (op, 0, i));
16603 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16604 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16605 }
16606 break;
16607 case E_V2HFmode:
16608 case E_V2BFmode:
16609 case E_V4HFmode:
16610 case E_V4BFmode:
16611 case E_V2SFmode:
16612 for (int i = 0; i < nunits; ++i)
16613 {
16614 rtx x = XVECEXP (op, 0, i);
16615 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16616 REAL_MODE_FORMAT (innermode));
16617 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16618 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16619 }
16620 break;
16621 default:
16622 gcc_unreachable ();
16623 }
16624
16625 return val.to_shwi ();
16626}
16627
16628int ix86_get_flags_cc (rtx_code code)
16629{
16630 switch (code)
16631 {
16632 case NE: return X86_CCNE;
16633 case EQ: return X86_CCE;
16634 case GE: return X86_CCNL;
16635 case GT: return X86_CCNLE;
16636 case LE: return X86_CCLE;
16637 case LT: return X86_CCL;
16638 case GEU: return X86_CCNB;
16639 case GTU: return X86_CCNBE;
16640 case LEU: return X86_CCBE;
16641 case LTU: return X86_CCB;
16642 default: return -1;
16643 }
16644}
16645
16646/* Return TRUE or FALSE depending on whether the first SET in INSN
16647 has source and destination with matching CC modes, and that the
16648 CC mode is at least as constrained as REQ_MODE. */
16649
16650bool
16651ix86_match_ccmode (rtx insn, machine_mode req_mode)
16652{
16653 rtx set;
16654 machine_mode set_mode;
16655
16656 set = PATTERN (insn);
16657 if (GET_CODE (set) == PARALLEL)
16658 set = XVECEXP (set, 0, 0);
16659 gcc_assert (GET_CODE (set) == SET);
16660 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16661
16662 set_mode = GET_MODE (SET_DEST (set));
16663 switch (set_mode)
16664 {
16665 case E_CCNOmode:
16666 if (req_mode != CCNOmode
16667 && (req_mode != CCmode
16668 || XEXP (SET_SRC (set), 1) != const0_rtx))
16669 return false;
16670 break;
16671 case E_CCmode:
16672 if (req_mode == CCGCmode)
16673 return false;
16674 /* FALLTHRU */
16675 case E_CCGCmode:
16676 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16677 return false;
16678 /* FALLTHRU */
16679 case E_CCGOCmode:
16680 if (req_mode == CCZmode)
16681 return false;
16682 /* FALLTHRU */
16683 case E_CCZmode:
16684 break;
16685
16686 case E_CCGZmode:
16687
16688 case E_CCAmode:
16689 case E_CCCmode:
16690 case E_CCOmode:
16691 case E_CCPmode:
16692 case E_CCSmode:
16693 if (set_mode != req_mode)
16694 return false;
16695 break;
16696
16697 default:
16698 gcc_unreachable ();
16699 }
16700
16701 return GET_MODE (SET_SRC (set)) == set_mode;
16702}
16703
16704machine_mode
16705ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16706{
16707 machine_mode mode = GET_MODE (op0);
16708
16709 if (SCALAR_FLOAT_MODE_P (mode))
16710 {
16711 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16712 return CCFPmode;
16713 }
16714
16715 switch (code)
16716 {
16717 /* Only zero flag is needed. */
16718 case EQ: /* ZF=0 */
16719 case NE: /* ZF!=0 */
16720 return CCZmode;
16721 /* Codes needing carry flag. */
16722 case GEU: /* CF=0 */
16723 case LTU: /* CF=1 */
16724 rtx geu;
16725 /* Detect overflow checks. They need just the carry flag. */
16726 if (GET_CODE (op0) == PLUS
16727 && (rtx_equal_p (op1, XEXP (op0, 0))
16728 || rtx_equal_p (op1, XEXP (op0, 1))))
16729 return CCCmode;
16730 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16731 Match LTU of op0
16732 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16733 and op1
16734 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16735 where CC_CCC is either CC or CCC. */
16736 else if (code == LTU
16737 && GET_CODE (op0) == NEG
16738 && GET_CODE (geu = XEXP (op0, 0)) == GEU
16739 && REG_P (XEXP (geu, 0))
16740 && (GET_MODE (XEXP (geu, 0)) == CCCmode
16741 || GET_MODE (XEXP (geu, 0)) == CCmode)
16742 && REGNO (XEXP (geu, 0)) == FLAGS_REG
16743 && XEXP (geu, 1) == const0_rtx
16744 && GET_CODE (op1) == LTU
16745 && REG_P (XEXP (op1, 0))
16746 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16747 && REGNO (XEXP (op1, 0)) == FLAGS_REG
16748 && XEXP (op1, 1) == const0_rtx)
16749 return CCCmode;
16750 /* Similarly for *x86_cmc pattern.
16751 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16752 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16753 It is sufficient to test that the operand modes are CCCmode. */
16754 else if (code == LTU
16755 && GET_CODE (op0) == NEG
16756 && GET_CODE (XEXP (op0, 0)) == LTU
16757 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16758 && GET_CODE (op1) == GEU
16759 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16760 return CCCmode;
16761 /* Similarly for the comparison of addcarry/subborrow pattern. */
16762 else if (code == LTU
16763 && GET_CODE (op0) == ZERO_EXTEND
16764 && GET_CODE (op1) == PLUS
16765 && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
16766 && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
16767 return CCCmode;
16768 else
16769 return CCmode;
16770 case GTU: /* CF=0 & ZF=0 */
16771 case LEU: /* CF=1 | ZF=1 */
16772 return CCmode;
16773 /* Codes possibly doable only with sign flag when
16774 comparing against zero. */
16775 case GE: /* SF=OF or SF=0 */
16776 case LT: /* SF<>OF or SF=1 */
16777 if (op1 == const0_rtx)
16778 return CCGOCmode;
16779 else
16780 /* For other cases Carry flag is not required. */
16781 return CCGCmode;
16782 /* Codes doable only with sign flag when comparing
16783 against zero, but we miss jump instruction for it
16784 so we need to use relational tests against overflow
16785 that thus needs to be zero. */
16786 case GT: /* ZF=0 & SF=OF */
16787 case LE: /* ZF=1 | SF<>OF */
16788 if (op1 == const0_rtx)
16789 return CCNOmode;
16790 else
16791 return CCGCmode;
16792 default:
16793 /* CCmode should be used in all other cases. */
16794 return CCmode;
16795 }
16796}
16797
16798/* Return TRUE or FALSE depending on whether the ptest instruction
16799 INSN has source and destination with suitable matching CC modes. */
16800
16801bool
16802ix86_match_ptest_ccmode (rtx insn)
16803{
16804 rtx set, src;
16805 machine_mode set_mode;
16806
16807 set = PATTERN (insn);
16808 gcc_assert (GET_CODE (set) == SET);
16809 src = SET_SRC (set);
16810 gcc_assert (GET_CODE (src) == UNSPEC
16811 && XINT (src, 1) == UNSPEC_PTEST);
16812
16813 set_mode = GET_MODE (src);
16814 if (set_mode != CCZmode
16815 && set_mode != CCCmode
16816 && set_mode != CCmode)
16817 return false;
16818 return GET_MODE (SET_DEST (set)) == set_mode;
16819}
16820
16821/* Return the fixed registers used for condition codes. */
16822
16823static bool
16824ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16825{
16826 *p1 = FLAGS_REG;
16827 *p2 = INVALID_REGNUM;
16828 return true;
16829}
16830
16831/* If two condition code modes are compatible, return a condition code
16832 mode which is compatible with both. Otherwise, return
16833 VOIDmode. */
16834
16835static machine_mode
16836ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16837{
16838 if (m1 == m2)
16839 return m1;
16840
16841 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16842 return VOIDmode;
16843
16844 if ((m1 == CCGCmode && m2 == CCGOCmode)
16845 || (m1 == CCGOCmode && m2 == CCGCmode))
16846 return CCGCmode;
16847
16848 if ((m1 == CCNOmode && m2 == CCGOCmode)
16849 || (m1 == CCGOCmode && m2 == CCNOmode))
16850 return CCNOmode;
16851
16852 if (m1 == CCZmode
16853 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16854 return m2;
16855 else if (m2 == CCZmode
16856 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16857 return m1;
16858
16859 switch (m1)
16860 {
16861 default:
16862 gcc_unreachable ();
16863
16864 case E_CCmode:
16865 case E_CCGCmode:
16866 case E_CCGOCmode:
16867 case E_CCNOmode:
16868 case E_CCAmode:
16869 case E_CCCmode:
16870 case E_CCOmode:
16871 case E_CCPmode:
16872 case E_CCSmode:
16873 case E_CCZmode:
16874 switch (m2)
16875 {
16876 default:
16877 return VOIDmode;
16878
16879 case E_CCmode:
16880 case E_CCGCmode:
16881 case E_CCGOCmode:
16882 case E_CCNOmode:
16883 case E_CCAmode:
16884 case E_CCCmode:
16885 case E_CCOmode:
16886 case E_CCPmode:
16887 case E_CCSmode:
16888 case E_CCZmode:
16889 return CCmode;
16890 }
16891
16892 case E_CCFPmode:
16893 /* These are only compatible with themselves, which we already
16894 checked above. */
16895 return VOIDmode;
16896 }
16897}
16898
16899/* Return strategy to use for floating-point. We assume that fcomi is always
16900 preferrable where available, since that is also true when looking at size
16901 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16902
16903enum ix86_fpcmp_strategy
16904ix86_fp_comparison_strategy (enum rtx_code)
16905{
16906 /* Do fcomi/sahf based test when profitable. */
16907
16908 if (TARGET_CMOVE)
16909 return IX86_FPCMP_COMI;
16910
16911 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16912 return IX86_FPCMP_SAHF;
16913
16914 return IX86_FPCMP_ARITH;
16915}
16916
16917/* Convert comparison codes we use to represent FP comparison to integer
16918 code that will result in proper branch. Return UNKNOWN if no such code
16919 is available. */
16920
16921enum rtx_code
16922ix86_fp_compare_code_to_integer (enum rtx_code code)
16923{
16924 switch (code)
16925 {
16926 case GT:
16927 return GTU;
16928 case GE:
16929 return GEU;
16930 case ORDERED:
16931 case UNORDERED:
16932 return code;
16933 case UNEQ:
16934 return EQ;
16935 case UNLT:
16936 return LTU;
16937 case UNLE:
16938 return LEU;
16939 case LTGT:
16940 return NE;
16941 case EQ:
16942 case NE:
16943 if (TARGET_AVX10_2)
16944 return code;
16945 /* FALLTHRU. */
16946 default:
16947 return UNKNOWN;
16948 }
16949}
16950
16951/* Zero extend possibly SImode EXP to Pmode register. */
16952rtx
16953ix86_zero_extend_to_Pmode (rtx exp)
16954{
16955 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16956}
16957
16958/* Return true if the function is called via PLT. */
16959
16960bool
16961ix86_call_use_plt_p (rtx call_op)
16962{
16963 if (SYMBOL_REF_LOCAL_P (call_op))
16964 {
16965 if (SYMBOL_REF_DECL (call_op)
16966 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16967 {
16968 /* NB: All ifunc functions must be called via PLT. */
16969 cgraph_node *node
16970 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16971 if (node && node->ifunc_resolver)
16972 return true;
16973 }
16974 return false;
16975 }
16976 return true;
16977}
16978
16979/* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16980 the PLT entry will be used as the function address for local IFUNC
16981 functions. When the PIC register is needed for PLT call, indirect
16982 call via the PLT entry will fail since the PIC register may not be
16983 set up properly for indirect call. In this case, we should return
16984 false. */
16985
16986static bool
16987ix86_ifunc_ref_local_ok (void)
16988{
16989 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16990}
16991
16992/* Return true if the function being called was marked with attribute
16993 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16994 to handle the non-PIC case in the backend because there is no easy
16995 interface for the front-end to force non-PLT calls to use the GOT.
16996 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16997 to call the function marked "noplt" indirectly. */
16998
16999bool
17000ix86_nopic_noplt_attribute_p (rtx call_op)
17001{
17002 if (flag_pic || ix86_cmodel == CM_LARGE
17003 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
17004 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
17005 || SYMBOL_REF_LOCAL_P (call_op))
17006 return false;
17007
17008 tree symbol_decl = SYMBOL_REF_DECL (call_op);
17009
17010 if (!flag_plt
17011 || (symbol_decl != NULL_TREE
17012 && lookup_attribute (attr_name: "noplt", DECL_ATTRIBUTES (symbol_decl))))
17013 return true;
17014
17015 return false;
17016}
17017
17018/* Helper to output the jmp/call. */
17019static void
17020ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
17021{
17022 if (thunk_name != NULL)
17023 {
17024 if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
17025 && ix86_indirect_branch_cs_prefix)
17026 fprintf (stream: asm_out_file, format: "\tcs\n");
17027 fprintf (stream: asm_out_file, format: "\tjmp\t");
17028 assemble_name (asm_out_file, thunk_name);
17029 putc (c: '\n', stream: asm_out_file);
17030 if ((ix86_harden_sls & harden_sls_indirect_jmp))
17031 fputs (s: "\tint3\n", stream: asm_out_file);
17032 }
17033 else
17034 output_indirect_thunk (regno);
17035}
17036
17037/* Output indirect branch via a call and return thunk. CALL_OP is a
17038 register which contains the branch target. XASM is the assembly
17039 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
17040 A normal call is converted to:
17041
17042 call __x86_indirect_thunk_reg
17043
17044 and a tail call is converted to:
17045
17046 jmp __x86_indirect_thunk_reg
17047 */
17048
17049static void
17050ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
17051{
17052 char thunk_name_buf[32];
17053 char *thunk_name;
17054 enum indirect_thunk_prefix need_prefix
17055 = indirect_thunk_need_prefix (insn: current_output_insn);
17056 int regno = REGNO (call_op);
17057
17058 if (cfun->machine->indirect_branch_type
17059 != indirect_branch_thunk_inline)
17060 {
17061 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17062 SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno);
17063
17064 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
17065 thunk_name = thunk_name_buf;
17066 }
17067 else
17068 thunk_name = NULL;
17069
17070 if (sibcall_p)
17071 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17072 else
17073 {
17074 if (thunk_name != NULL)
17075 {
17076 if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
17077 && ix86_indirect_branch_cs_prefix)
17078 fprintf (stream: asm_out_file, format: "\tcs\n");
17079 fprintf (stream: asm_out_file, format: "\tcall\t");
17080 assemble_name (asm_out_file, thunk_name);
17081 putc (c: '\n', stream: asm_out_file);
17082 return;
17083 }
17084
17085 char indirectlabel1[32];
17086 char indirectlabel2[32];
17087
17088 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17089 INDIRECT_LABEL,
17090 indirectlabelno++);
17091 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17092 INDIRECT_LABEL,
17093 indirectlabelno++);
17094
17095 /* Jump. */
17096 fputs (s: "\tjmp\t", stream: asm_out_file);
17097 assemble_name_raw (asm_out_file, indirectlabel2);
17098 fputc (c: '\n', stream: asm_out_file);
17099
17100 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17101
17102 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17103
17104 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17105
17106 /* Call. */
17107 fputs (s: "\tcall\t", stream: asm_out_file);
17108 assemble_name_raw (asm_out_file, indirectlabel1);
17109 fputc (c: '\n', stream: asm_out_file);
17110 }
17111}
17112
17113/* Output indirect branch via a call and return thunk. CALL_OP is
17114 the branch target. XASM is the assembly template for CALL_OP.
17115 Branch is a tail call if SIBCALL_P is true. A normal call is
17116 converted to:
17117
17118 jmp L2
17119 L1:
17120 push CALL_OP
17121 jmp __x86_indirect_thunk
17122 L2:
17123 call L1
17124
17125 and a tail call is converted to:
17126
17127 push CALL_OP
17128 jmp __x86_indirect_thunk
17129 */
17130
17131static void
17132ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
17133 bool sibcall_p)
17134{
17135 char thunk_name_buf[32];
17136 char *thunk_name;
17137 char push_buf[64];
17138 enum indirect_thunk_prefix need_prefix
17139 = indirect_thunk_need_prefix (insn: current_output_insn);
17140 int regno = -1;
17141
17142 if (cfun->machine->indirect_branch_type
17143 != indirect_branch_thunk_inline)
17144 {
17145 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
17146 indirect_thunk_needed = true;
17147 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
17148 thunk_name = thunk_name_buf;
17149 }
17150 else
17151 thunk_name = NULL;
17152
17153 snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s",
17154 TARGET_64BIT ? 'q' : 'l', xasm);
17155
17156 if (sibcall_p)
17157 {
17158 output_asm_insn (push_buf, &call_op);
17159 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17160 }
17161 else
17162 {
17163 char indirectlabel1[32];
17164 char indirectlabel2[32];
17165
17166 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
17167 INDIRECT_LABEL,
17168 indirectlabelno++);
17169 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
17170 INDIRECT_LABEL,
17171 indirectlabelno++);
17172
17173 /* Jump. */
17174 fputs (s: "\tjmp\t", stream: asm_out_file);
17175 assemble_name_raw (asm_out_file, indirectlabel2);
17176 fputc (c: '\n', stream: asm_out_file);
17177
17178 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
17179
17180 /* An external function may be called via GOT, instead of PLT. */
17181 if (MEM_P (call_op))
17182 {
17183 struct ix86_address parts;
17184 rtx addr = XEXP (call_op, 0);
17185 if (ix86_decompose_address (addr, out: &parts)
17186 && parts.base == stack_pointer_rtx)
17187 {
17188 /* Since call will adjust stack by -UNITS_PER_WORD,
17189 we must convert "disp(stack, index, scale)" to
17190 "disp+UNITS_PER_WORD(stack, index, scale)". */
17191 if (parts.index)
17192 {
17193 addr = gen_rtx_MULT (Pmode, parts.index,
17194 GEN_INT (parts.scale));
17195 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17196 addr);
17197 }
17198 else
17199 addr = stack_pointer_rtx;
17200
17201 rtx disp;
17202 if (parts.disp != NULL_RTX)
17203 disp = plus_constant (Pmode, parts.disp,
17204 UNITS_PER_WORD);
17205 else
17206 disp = GEN_INT (UNITS_PER_WORD);
17207
17208 addr = gen_rtx_PLUS (Pmode, addr, disp);
17209 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17210 }
17211 }
17212
17213 output_asm_insn (push_buf, &call_op);
17214
17215 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17216
17217 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17218
17219 /* Call. */
17220 fputs (s: "\tcall\t", stream: asm_out_file);
17221 assemble_name_raw (asm_out_file, indirectlabel1);
17222 fputc (c: '\n', stream: asm_out_file);
17223 }
17224}
17225
17226/* Output indirect branch via a call and return thunk. CALL_OP is
17227 the branch target. XASM is the assembly template for CALL_OP.
17228 Branch is a tail call if SIBCALL_P is true. */
17229
17230static void
17231ix86_output_indirect_branch (rtx call_op, const char *xasm,
17232 bool sibcall_p)
17233{
17234 if (REG_P (call_op))
17235 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17236 else
17237 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17238}
17239
17240/* Output indirect jump. CALL_OP is the jump target. */
17241
17242const char *
17243ix86_output_indirect_jmp (rtx call_op)
17244{
17245 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17246 {
17247 /* We can't have red-zone since "call" in the indirect thunk
17248 pushes the return address onto stack, destroying red-zone. */
17249 if (ix86_red_zone_used)
17250 gcc_unreachable ();
17251
17252 ix86_output_indirect_branch (call_op, xasm: "%0", sibcall_p: true);
17253 }
17254 else
17255 output_asm_insn ("%!jmp\t%A0", &call_op);
17256 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17257}
17258
17259/* Output return instrumentation for current function if needed. */
17260
17261static void
17262output_return_instrumentation (void)
17263{
17264 if (ix86_instrument_return != instrument_return_none
17265 && flag_fentry
17266 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17267 {
17268 if (ix86_flag_record_return)
17269 fprintf (stream: asm_out_file, format: "1:\n");
17270 switch (ix86_instrument_return)
17271 {
17272 case instrument_return_call:
17273 fprintf (stream: asm_out_file, format: "\tcall\t__return__\n");
17274 break;
17275 case instrument_return_nop5:
17276 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17277 fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17278 break;
17279 case instrument_return_none:
17280 break;
17281 }
17282
17283 if (ix86_flag_record_return)
17284 {
17285 fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n");
17286 fprintf (stream: asm_out_file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17287 fprintf (stream: asm_out_file, format: "\t.previous\n");
17288 }
17289 }
17290}
17291
17292/* Output function return. CALL_OP is the jump target. Add a REP
17293 prefix to RET if LONG_P is true and function return is kept. */
17294
17295const char *
17296ix86_output_function_return (bool long_p)
17297{
17298 output_return_instrumentation ();
17299
17300 if (cfun->machine->function_return_type != indirect_branch_keep)
17301 {
17302 char thunk_name[32];
17303 enum indirect_thunk_prefix need_prefix
17304 = indirect_thunk_need_prefix (insn: current_output_insn);
17305
17306 if (cfun->machine->function_return_type
17307 != indirect_branch_thunk_inline)
17308 {
17309 bool need_thunk = (cfun->machine->function_return_type
17310 == indirect_branch_thunk);
17311 indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix,
17312 ret_p: true);
17313 indirect_return_needed |= need_thunk;
17314 fprintf (stream: asm_out_file, format: "\tjmp\t");
17315 assemble_name (asm_out_file, thunk_name);
17316 putc (c: '\n', stream: asm_out_file);
17317 }
17318 else
17319 output_indirect_thunk (INVALID_REGNUM);
17320
17321 return "";
17322 }
17323
17324 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17325 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17326}
17327
17328/* Output indirect function return. RET_OP is the function return
17329 target. */
17330
17331const char *
17332ix86_output_indirect_function_return (rtx ret_op)
17333{
17334 if (cfun->machine->function_return_type != indirect_branch_keep)
17335 {
17336 char thunk_name[32];
17337 enum indirect_thunk_prefix need_prefix
17338 = indirect_thunk_need_prefix (insn: current_output_insn);
17339 unsigned int regno = REGNO (ret_op);
17340 gcc_assert (regno == CX_REG);
17341
17342 if (cfun->machine->function_return_type
17343 != indirect_branch_thunk_inline)
17344 {
17345 bool need_thunk = (cfun->machine->function_return_type
17346 == indirect_branch_thunk);
17347 indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true);
17348
17349 if (need_thunk)
17350 {
17351 indirect_return_via_cx = true;
17352 SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG);
17353 }
17354 fprintf (stream: asm_out_file, format: "\tjmp\t");
17355 assemble_name (asm_out_file, thunk_name);
17356 putc (c: '\n', stream: asm_out_file);
17357 }
17358 else
17359 output_indirect_thunk (regno);
17360 }
17361 else
17362 {
17363 output_asm_insn ("%!jmp\t%A0", &ret_op);
17364 if (ix86_harden_sls & harden_sls_indirect_jmp)
17365 fputs (s: "\tint3\n", stream: asm_out_file);
17366 }
17367 return "";
17368}
17369
17370/* Output the assembly for a call instruction. */
17371
17372const char *
17373ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17374{
17375 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17376 bool output_indirect_p
17377 = (!TARGET_SEH
17378 && cfun->machine->indirect_branch_type != indirect_branch_keep);
17379 bool seh_nop_p = false;
17380 const char *xasm;
17381
17382 if (SIBLING_CALL_P (insn))
17383 {
17384 output_return_instrumentation ();
17385 if (direct_p)
17386 {
17387 if (ix86_nopic_noplt_attribute_p (call_op))
17388 {
17389 direct_p = false;
17390 if (TARGET_64BIT)
17391 {
17392 if (output_indirect_p)
17393 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17394 else
17395 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17396 }
17397 else
17398 {
17399 if (output_indirect_p)
17400 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17401 else
17402 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17403 }
17404 }
17405 else
17406 xasm = "%!jmp\t%P0";
17407 }
17408 /* SEH epilogue detection requires the indirect branch case
17409 to include REX.W. */
17410 else if (TARGET_SEH)
17411 xasm = "%!rex.W jmp\t%A0";
17412 else
17413 {
17414 if (output_indirect_p)
17415 xasm = "%0";
17416 else
17417 xasm = "%!jmp\t%A0";
17418 }
17419
17420 if (output_indirect_p && !direct_p)
17421 ix86_output_indirect_branch (call_op, xasm, sibcall_p: true);
17422 else
17423 {
17424 output_asm_insn (xasm, &call_op);
17425 if (!direct_p
17426 && (ix86_harden_sls & harden_sls_indirect_jmp))
17427 return "int3";
17428 }
17429 return "";
17430 }
17431
17432 /* SEH unwinding can require an extra nop to be emitted in several
17433 circumstances. Determine if we have one of those. */
17434 if (TARGET_SEH)
17435 {
17436 rtx_insn *i;
17437
17438 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i))
17439 {
17440 /* Prevent a catch region from being adjacent to a jump that would
17441 be interpreted as an epilogue sequence by the unwinder. */
17442 if (JUMP_P(i) && CROSSING_JUMP_P (i))
17443 {
17444 seh_nop_p = true;
17445 break;
17446 }
17447
17448 /* If we get to another real insn, we don't need the nop. */
17449 if (INSN_P (i))
17450 break;
17451
17452 /* If we get to the epilogue note, prevent a catch region from
17453 being adjacent to the standard epilogue sequence. Note that,
17454 if non-call exceptions are enabled, we already did it during
17455 epilogue expansion, or else, if the insn can throw internally,
17456 we already did it during the reorg pass. */
17457 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17458 && !flag_non_call_exceptions
17459 && !can_throw_internal (insn))
17460 {
17461 seh_nop_p = true;
17462 break;
17463 }
17464 }
17465
17466 /* If we didn't find a real insn following the call, prevent the
17467 unwinder from looking into the next function. */
17468 if (i == NULL)
17469 seh_nop_p = true;
17470 }
17471
17472 if (direct_p)
17473 {
17474 if (ix86_nopic_noplt_attribute_p (call_op))
17475 {
17476 direct_p = false;
17477 if (TARGET_64BIT)
17478 {
17479 if (output_indirect_p)
17480 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17481 else
17482 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17483 }
17484 else
17485 {
17486 if (output_indirect_p)
17487 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17488 else
17489 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17490 }
17491 }
17492 else
17493 xasm = "%!call\t%P0";
17494 }
17495 else
17496 {
17497 if (output_indirect_p)
17498 xasm = "%0";
17499 else
17500 xasm = "%!call\t%A0";
17501 }
17502
17503 if (output_indirect_p && !direct_p)
17504 ix86_output_indirect_branch (call_op, xasm, sibcall_p: false);
17505 else
17506 output_asm_insn (xasm, &call_op);
17507
17508 if (seh_nop_p)
17509 return "nop";
17510
17511 return "";
17512}
17513
17514/* Return a MEM corresponding to a stack slot with mode MODE.
17515 Allocate a new slot if necessary.
17516
17517 The RTL for a function can have several slots available: N is
17518 which slot to use. */
17519
17520rtx
17521assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17522{
17523 struct stack_local_entry *s;
17524
17525 gcc_assert (n < MAX_386_STACK_LOCALS);
17526
17527 for (s = ix86_stack_locals; s; s = s->next)
17528 if (s->mode == mode && s->n == n)
17529 return validize_mem (copy_rtx (s->rtl));
17530
17531 int align = 0;
17532 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17533 alignment with -m32 -mpreferred-stack-boundary=2. */
17534 if (mode == DImode
17535 && !TARGET_64BIT
17536 && n == SLOT_FLOATxFDI_387
17537 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17538 align = 32;
17539 s = ggc_alloc<stack_local_entry> ();
17540 s->n = n;
17541 s->mode = mode;
17542 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17543
17544 s->next = ix86_stack_locals;
17545 ix86_stack_locals = s;
17546 return validize_mem (copy_rtx (s->rtl));
17547}
17548
17549static void
17550ix86_instantiate_decls (void)
17551{
17552 struct stack_local_entry *s;
17553
17554 for (s = ix86_stack_locals; s; s = s->next)
17555 if (s->rtl != NULL_RTX)
17556 instantiate_decl_rtl (x: s->rtl);
17557}
17558
17559/* Check whether x86 address PARTS is a pc-relative address. */
17560
17561bool
17562ix86_rip_relative_addr_p (struct ix86_address *parts)
17563{
17564 rtx base, index, disp;
17565
17566 base = parts->base;
17567 index = parts->index;
17568 disp = parts->disp;
17569
17570 if (disp && !base && !index)
17571 {
17572 if (TARGET_64BIT)
17573 {
17574 rtx symbol = disp;
17575
17576 if (GET_CODE (disp) == CONST)
17577 symbol = XEXP (disp, 0);
17578 if (GET_CODE (symbol) == PLUS
17579 && CONST_INT_P (XEXP (symbol, 1)))
17580 symbol = XEXP (symbol, 0);
17581
17582 if (GET_CODE (symbol) == LABEL_REF
17583 || (GET_CODE (symbol) == SYMBOL_REF
17584 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17585 || (GET_CODE (symbol) == UNSPEC
17586 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17587 || XINT (symbol, 1) == UNSPEC_PCREL
17588 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17589 return true;
17590 }
17591 }
17592 return false;
17593}
17594
17595/* Calculate the length of the memory address in the instruction encoding.
17596 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17597 or other prefixes. We never generate addr32 prefix for LEA insn. */
17598
17599int
17600memory_address_length (rtx addr, bool lea)
17601{
17602 struct ix86_address parts;
17603 rtx base, index, disp;
17604 int len;
17605 int ok;
17606
17607 if (GET_CODE (addr) == PRE_DEC
17608 || GET_CODE (addr) == POST_INC
17609 || GET_CODE (addr) == PRE_MODIFY
17610 || GET_CODE (addr) == POST_MODIFY)
17611 return 0;
17612
17613 ok = ix86_decompose_address (addr, out: &parts);
17614 gcc_assert (ok);
17615
17616 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17617
17618 /* If this is not LEA instruction, add the length of addr32 prefix. */
17619 if (TARGET_64BIT && !lea
17620 && (SImode_address_operand (addr, VOIDmode)
17621 || (parts.base && GET_MODE (parts.base) == SImode)
17622 || (parts.index && GET_MODE (parts.index) == SImode)))
17623 len++;
17624
17625 base = parts.base;
17626 index = parts.index;
17627 disp = parts.disp;
17628
17629 if (base && SUBREG_P (base))
17630 base = SUBREG_REG (base);
17631 if (index && SUBREG_P (index))
17632 index = SUBREG_REG (index);
17633
17634 gcc_assert (base == NULL_RTX || REG_P (base));
17635 gcc_assert (index == NULL_RTX || REG_P (index));
17636
17637 /* Rule of thumb:
17638 - esp as the base always wants an index,
17639 - ebp as the base always wants a displacement,
17640 - r12 as the base always wants an index,
17641 - r13 as the base always wants a displacement. */
17642
17643 /* Register Indirect. */
17644 if (base && !index && !disp)
17645 {
17646 /* esp (for its index) and ebp (for its displacement) need
17647 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17648 code. */
17649 if (base == arg_pointer_rtx
17650 || base == frame_pointer_rtx
17651 || REGNO (base) == SP_REG
17652 || REGNO (base) == BP_REG
17653 || REGNO (base) == R12_REG
17654 || REGNO (base) == R13_REG)
17655 len++;
17656 }
17657
17658 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17659 is not disp32, but disp32(%rip), so for disp32
17660 SIB byte is needed, unless print_operand_address
17661 optimizes it into disp32(%rip) or (%rip) is implied
17662 by UNSPEC. */
17663 else if (disp && !base && !index)
17664 {
17665 len += 4;
17666 if (!ix86_rip_relative_addr_p (parts: &parts))
17667 len++;
17668 }
17669 else
17670 {
17671 /* Find the length of the displacement constant. */
17672 if (disp)
17673 {
17674 if (base && satisfies_constraint_K (op: disp))
17675 len += 1;
17676 else
17677 len += 4;
17678 }
17679 /* ebp always wants a displacement. Similarly r13. */
17680 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17681 len++;
17682
17683 /* An index requires the two-byte modrm form.... */
17684 if (index
17685 /* ...like esp (or r12), which always wants an index. */
17686 || base == arg_pointer_rtx
17687 || base == frame_pointer_rtx
17688 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17689 len++;
17690 }
17691
17692 return len;
17693}
17694
17695/* Compute default value for "length_immediate" attribute. When SHORTFORM
17696 is set, expect that insn have 8bit immediate alternative. */
17697int
17698ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17699{
17700 int len = 0;
17701 int i;
17702 extract_insn_cached (insn);
17703 for (i = recog_data.n_operands - 1; i >= 0; --i)
17704 if (CONSTANT_P (recog_data.operand[i]))
17705 {
17706 enum attr_mode mode = get_attr_mode (insn);
17707
17708 gcc_assert (!len);
17709 if (shortform && CONST_INT_P (recog_data.operand[i]))
17710 {
17711 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17712 switch (mode)
17713 {
17714 case MODE_QI:
17715 len = 1;
17716 continue;
17717 case MODE_HI:
17718 ival = trunc_int_for_mode (ival, HImode);
17719 break;
17720 case MODE_SI:
17721 ival = trunc_int_for_mode (ival, SImode);
17722 break;
17723 default:
17724 break;
17725 }
17726 if (IN_RANGE (ival, -128, 127))
17727 {
17728 len = 1;
17729 continue;
17730 }
17731 }
17732 switch (mode)
17733 {
17734 case MODE_QI:
17735 len = 1;
17736 break;
17737 case MODE_HI:
17738 len = 2;
17739 break;
17740 case MODE_SI:
17741 len = 4;
17742 break;
17743 /* Immediates for DImode instructions are encoded
17744 as 32bit sign extended values. */
17745 case MODE_DI:
17746 len = 4;
17747 break;
17748 default:
17749 fatal_insn ("unknown insn mode", insn);
17750 }
17751 }
17752 return len;
17753}
17754
17755/* Compute default value for "length_address" attribute. */
17756int
17757ix86_attr_length_address_default (rtx_insn *insn)
17758{
17759 int i;
17760
17761 if (get_attr_type (insn) == TYPE_LEA)
17762 {
17763 rtx set = PATTERN (insn), addr;
17764
17765 if (GET_CODE (set) == PARALLEL)
17766 set = XVECEXP (set, 0, 0);
17767
17768 gcc_assert (GET_CODE (set) == SET);
17769
17770 addr = SET_SRC (set);
17771
17772 return memory_address_length (addr, lea: true);
17773 }
17774
17775 extract_insn_cached (insn);
17776 for (i = recog_data.n_operands - 1; i >= 0; --i)
17777 {
17778 rtx op = recog_data.operand[i];
17779 if (MEM_P (op))
17780 {
17781 constrain_operands_cached (insn, reload_completed);
17782 if (which_alternative != -1)
17783 {
17784 const char *constraints = recog_data.constraints[i];
17785 int alt = which_alternative;
17786
17787 while (*constraints == '=' || *constraints == '+')
17788 constraints++;
17789 while (alt-- > 0)
17790 while (*constraints++ != ',')
17791 ;
17792 /* Skip ignored operands. */
17793 if (*constraints == 'X')
17794 continue;
17795 }
17796
17797 int len = memory_address_length (XEXP (op, 0), lea: false);
17798
17799 /* Account for segment prefix for non-default addr spaces. */
17800 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17801 len++;
17802
17803 return len;
17804 }
17805 }
17806 return 0;
17807}
17808
17809/* Compute default value for "length_vex" attribute. It includes
17810 2 or 3 byte VEX prefix and 1 opcode byte. */
17811
17812int
17813ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17814 bool has_vex_w)
17815{
17816 int i, reg_only = 2 + 1;
17817 bool has_mem = false;
17818
17819 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17820 byte VEX prefix. */
17821 if (!has_0f_opcode || has_vex_w)
17822 return 3 + 1;
17823
17824 /* We can always use 2 byte VEX prefix in 32bit. */
17825 if (!TARGET_64BIT)
17826 return 2 + 1;
17827
17828 extract_insn_cached (insn);
17829
17830 for (i = recog_data.n_operands - 1; i >= 0; --i)
17831 if (REG_P (recog_data.operand[i]))
17832 {
17833 /* REX.W bit uses 3 byte VEX prefix.
17834 REX2 with vex use extended EVEX prefix length is 4-byte. */
17835 if (GET_MODE (recog_data.operand[i]) == DImode
17836 && GENERAL_REG_P (recog_data.operand[i]))
17837 return 3 + 1;
17838
17839 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17840 operand will be encoded using VEX.B, so be conservative.
17841 REX2 with vex use extended EVEX prefix length is 4-byte. */
17842 if (REX_INT_REGNO_P (recog_data.operand[i])
17843 || REX2_INT_REGNO_P (recog_data.operand[i])
17844 || REX_SSE_REGNO_P (recog_data.operand[i]))
17845 reg_only = 3 + 1;
17846 }
17847 else if (MEM_P (recog_data.operand[i]))
17848 {
17849 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
17850 if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
17851 return 4;
17852
17853 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17854 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17855 return 3 + 1;
17856
17857 has_mem = true;
17858 }
17859
17860 return has_mem ? 2 + 1 : reg_only;
17861}
17862
17863
17864static bool
17865ix86_class_likely_spilled_p (reg_class_t);
17866
17867/* Returns true if lhs of insn is HW function argument register and set up
17868 is_spilled to true if it is likely spilled HW register. */
17869static bool
17870insn_is_function_arg (rtx insn, bool* is_spilled)
17871{
17872 rtx dst;
17873
17874 if (!NONDEBUG_INSN_P (insn))
17875 return false;
17876 /* Call instructions are not movable, ignore it. */
17877 if (CALL_P (insn))
17878 return false;
17879 insn = PATTERN (insn);
17880 if (GET_CODE (insn) == PARALLEL)
17881 insn = XVECEXP (insn, 0, 0);
17882 if (GET_CODE (insn) != SET)
17883 return false;
17884 dst = SET_DEST (insn);
17885 if (REG_P (dst) && HARD_REGISTER_P (dst)
17886 && ix86_function_arg_regno_p (REGNO (dst)))
17887 {
17888 /* Is it likely spilled HW register? */
17889 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17890 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17891 *is_spilled = true;
17892 return true;
17893 }
17894 return false;
17895}
17896
17897/* Add output dependencies for chain of function adjacent arguments if only
17898 there is a move to likely spilled HW register. Return first argument
17899 if at least one dependence was added or NULL otherwise. */
17900static rtx_insn *
17901add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17902{
17903 rtx_insn *insn;
17904 rtx_insn *last = call;
17905 rtx_insn *first_arg = NULL;
17906 bool is_spilled = false;
17907
17908 head = PREV_INSN (insn: head);
17909
17910 /* Find nearest to call argument passing instruction. */
17911 while (true)
17912 {
17913 last = PREV_INSN (insn: last);
17914 if (last == head)
17915 return NULL;
17916 if (!NONDEBUG_INSN_P (last))
17917 continue;
17918 if (insn_is_function_arg (insn: last, is_spilled: &is_spilled))
17919 break;
17920 return NULL;
17921 }
17922
17923 first_arg = last;
17924 while (true)
17925 {
17926 insn = PREV_INSN (insn: last);
17927 if (!INSN_P (insn))
17928 break;
17929 if (insn == head)
17930 break;
17931 if (!NONDEBUG_INSN_P (insn))
17932 {
17933 last = insn;
17934 continue;
17935 }
17936 if (insn_is_function_arg (insn, is_spilled: &is_spilled))
17937 {
17938 /* Add output depdendence between two function arguments if chain
17939 of output arguments contains likely spilled HW registers. */
17940 if (is_spilled)
17941 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17942 first_arg = last = insn;
17943 }
17944 else
17945 break;
17946 }
17947 if (!is_spilled)
17948 return NULL;
17949 return first_arg;
17950}
17951
17952/* Add output or anti dependency from insn to first_arg to restrict its code
17953 motion. */
17954static void
17955avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17956{
17957 rtx set;
17958 rtx tmp;
17959
17960 set = single_set (insn);
17961 if (!set)
17962 return;
17963 tmp = SET_DEST (set);
17964 if (REG_P (tmp))
17965 {
17966 /* Add output dependency to the first function argument. */
17967 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17968 return;
17969 }
17970 /* Add anti dependency. */
17971 add_dependence (first_arg, insn, REG_DEP_ANTI);
17972}
17973
17974/* Avoid cross block motion of function argument through adding dependency
17975 from the first non-jump instruction in bb. */
17976static void
17977add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17978{
17979 rtx_insn *insn = BB_END (bb);
17980
17981 while (insn)
17982 {
17983 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17984 {
17985 rtx set = single_set (insn);
17986 if (set)
17987 {
17988 avoid_func_arg_motion (first_arg: arg, insn);
17989 return;
17990 }
17991 }
17992 if (insn == BB_HEAD (bb))
17993 return;
17994 insn = PREV_INSN (insn);
17995 }
17996}
17997
17998/* Hook for pre-reload schedule - avoid motion of function arguments
17999 passed in likely spilled HW registers. */
18000static void
18001ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
18002{
18003 rtx_insn *insn;
18004 rtx_insn *first_arg = NULL;
18005 if (reload_completed)
18006 return;
18007 while (head != tail && DEBUG_INSN_P (head))
18008 head = NEXT_INSN (insn: head);
18009 for (insn = tail; insn != head; insn = PREV_INSN (insn))
18010 if (INSN_P (insn) && CALL_P (insn))
18011 {
18012 first_arg = add_parameter_dependencies (call: insn, head);
18013 if (first_arg)
18014 {
18015 /* Add dependee for first argument to predecessors if only
18016 region contains more than one block. */
18017 basic_block bb = BLOCK_FOR_INSN (insn);
18018 int rgn = CONTAINING_RGN (bb->index);
18019 int nr_blks = RGN_NR_BLOCKS (rgn);
18020 /* Skip trivial regions and region head blocks that can have
18021 predecessors outside of region. */
18022 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
18023 {
18024 edge e;
18025 edge_iterator ei;
18026
18027 /* Regions are SCCs with the exception of selective
18028 scheduling with pipelining of outer blocks enabled.
18029 So also check that immediate predecessors of a non-head
18030 block are in the same region. */
18031 FOR_EACH_EDGE (e, ei, bb->preds)
18032 {
18033 /* Avoid creating of loop-carried dependencies through
18034 using topological ordering in the region. */
18035 if (rgn == CONTAINING_RGN (e->src->index)
18036 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
18037 add_dependee_for_func_arg (arg: first_arg, bb: e->src);
18038 }
18039 }
18040 insn = first_arg;
18041 if (insn == head)
18042 break;
18043 }
18044 }
18045 else if (first_arg)
18046 avoid_func_arg_motion (first_arg, insn);
18047}
18048
18049/* Hook for pre-reload schedule - set priority of moves from likely spilled
18050 HW registers to maximum, to schedule them at soon as possible. These are
18051 moves from function argument registers at the top of the function entry
18052 and moves from function return value registers after call. */
18053static int
18054ix86_adjust_priority (rtx_insn *insn, int priority)
18055{
18056 rtx set;
18057
18058 if (reload_completed)
18059 return priority;
18060
18061 if (!NONDEBUG_INSN_P (insn))
18062 return priority;
18063
18064 set = single_set (insn);
18065 if (set)
18066 {
18067 rtx tmp = SET_SRC (set);
18068 if (REG_P (tmp)
18069 && HARD_REGISTER_P (tmp)
18070 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
18071 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
18072 return current_sched_info->sched_max_insns_priority;
18073 }
18074
18075 return priority;
18076}
18077
18078/* Prepare for scheduling pass. */
18079static void
18080ix86_sched_init_global (FILE *, int, int)
18081{
18082 /* Install scheduling hooks for current CPU. Some of these hooks are used
18083 in time-critical parts of the scheduler, so we only set them up when
18084 they are actually used. */
18085 switch (ix86_tune)
18086 {
18087 case PROCESSOR_CORE2:
18088 case PROCESSOR_NEHALEM:
18089 case PROCESSOR_SANDYBRIDGE:
18090 case PROCESSOR_HASWELL:
18091 case PROCESSOR_TREMONT:
18092 case PROCESSOR_ALDERLAKE:
18093 case PROCESSOR_GENERIC:
18094 /* Do not perform multipass scheduling for pre-reload schedule
18095 to save compile time. */
18096 if (reload_completed)
18097 {
18098 ix86_core2i7_init_hooks ();
18099 break;
18100 }
18101 /* Fall through. */
18102 default:
18103 targetm.sched.dfa_post_advance_cycle = NULL;
18104 targetm.sched.first_cycle_multipass_init = NULL;
18105 targetm.sched.first_cycle_multipass_begin = NULL;
18106 targetm.sched.first_cycle_multipass_issue = NULL;
18107 targetm.sched.first_cycle_multipass_backtrack = NULL;
18108 targetm.sched.first_cycle_multipass_end = NULL;
18109 targetm.sched.first_cycle_multipass_fini = NULL;
18110 break;
18111 }
18112}
18113
18114
18115/* Implement TARGET_STATIC_RTX_ALIGNMENT. */
18116
18117static HOST_WIDE_INT
18118ix86_static_rtx_alignment (machine_mode mode)
18119{
18120 if (mode == DFmode)
18121 return 64;
18122 if (ALIGN_MODE_128 (mode))
18123 return MAX (128, GET_MODE_ALIGNMENT (mode));
18124 return GET_MODE_ALIGNMENT (mode);
18125}
18126
18127/* Implement TARGET_CONSTANT_ALIGNMENT. */
18128
18129static HOST_WIDE_INT
18130ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
18131{
18132 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18133 || TREE_CODE (exp) == INTEGER_CST)
18134 {
18135 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
18136 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
18137 return MAX (mode_align, align);
18138 }
18139 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18140 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18141 return BITS_PER_WORD;
18142
18143 return align;
18144}
18145
18146/* Implement TARGET_EMPTY_RECORD_P. */
18147
18148static bool
18149ix86_is_empty_record (const_tree type)
18150{
18151 if (!TARGET_64BIT)
18152 return false;
18153 return default_is_empty_record (type);
18154}
18155
18156/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
18157
18158static void
18159ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
18160{
18161 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
18162
18163 if (!cum->warn_empty)
18164 return;
18165
18166 if (!TYPE_EMPTY_P (type))
18167 return;
18168
18169 /* Don't warn if the function isn't visible outside of the TU. */
18170 if (cum->decl && !TREE_PUBLIC (cum->decl))
18171 return;
18172
18173 tree decl = cum->decl;
18174 if (!decl)
18175 /* If we don't know the target, look at the current TU. */
18176 decl = current_function_decl;
18177
18178 const_tree ctx = get_ultimate_context (decl);
18179 if (ctx == NULL_TREE
18180 || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
18181 return;
18182
18183 /* If the actual size of the type is zero, then there is no change
18184 in how objects of this size are passed. */
18185 if (int_size_in_bytes (type) == 0)
18186 return;
18187
18188 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
18189 "changes in %<-fabi-version=12%> (GCC 8)", type);
18190
18191 /* Only warn once. */
18192 cum->warn_empty = false;
18193}
18194
18195/* This hook returns name of multilib ABI. */
18196
18197static const char *
18198ix86_get_multilib_abi_name (void)
18199{
18200 if (!(TARGET_64BIT_P (ix86_isa_flags)))
18201 return "i386";
18202 else if (TARGET_X32_P (ix86_isa_flags))
18203 return "x32";
18204 else
18205 return "x86_64";
18206}
18207
18208/* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18209 the data type, and ALIGN is the alignment that the object would
18210 ordinarily have. */
18211
18212static int
18213iamcu_alignment (tree type, int align)
18214{
18215 machine_mode mode;
18216
18217 if (align < 32 || TYPE_USER_ALIGN (type))
18218 return align;
18219
18220 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18221 bytes. */
18222 type = strip_array_types (type);
18223 if (TYPE_ATOMIC (type))
18224 return align;
18225
18226 mode = TYPE_MODE (type);
18227 switch (GET_MODE_CLASS (mode))
18228 {
18229 case MODE_INT:
18230 case MODE_COMPLEX_INT:
18231 case MODE_COMPLEX_FLOAT:
18232 case MODE_FLOAT:
18233 case MODE_DECIMAL_FLOAT:
18234 return 32;
18235 default:
18236 return align;
18237 }
18238}
18239
18240/* Compute the alignment for a static variable.
18241 TYPE is the data type, and ALIGN is the alignment that
18242 the object would ordinarily have. The value of this function is used
18243 instead of that alignment to align the object. */
18244
18245int
18246ix86_data_alignment (tree type, unsigned int align, bool opt)
18247{
18248 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18249 for symbols from other compilation units or symbols that don't need
18250 to bind locally. In order to preserve some ABI compatibility with
18251 those compilers, ensure we don't decrease alignment from what we
18252 used to assume. */
18253
18254 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18255
18256 /* A data structure, equal or greater than the size of a cache line
18257 (64 bytes in the Pentium 4 and other recent Intel processors, including
18258 processors based on Intel Core microarchitecture) should be aligned
18259 so that its base address is a multiple of a cache line size. */
18260
18261 unsigned int max_align
18262 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18263
18264 if (max_align < BITS_PER_WORD)
18265 max_align = BITS_PER_WORD;
18266
18267 switch (ix86_align_data_type)
18268 {
18269 case ix86_align_data_type_abi: opt = false; break;
18270 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18271 case ix86_align_data_type_cacheline: break;
18272 }
18273
18274 if (TARGET_IAMCU)
18275 align = iamcu_alignment (type, align);
18276
18277 if (opt
18278 && AGGREGATE_TYPE_P (type)
18279 && TYPE_SIZE (type)
18280 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18281 {
18282 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat)
18283 && align < max_align_compat)
18284 align = max_align_compat;
18285 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align)
18286 && align < max_align)
18287 align = max_align;
18288 }
18289
18290 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18291 to 16byte boundary. */
18292 if (TARGET_64BIT)
18293 {
18294 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18295 && TYPE_SIZE (type)
18296 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18297 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18298 && align < 128)
18299 return 128;
18300 }
18301
18302 if (!opt)
18303 return align;
18304
18305 if (TREE_CODE (type) == ARRAY_TYPE)
18306 {
18307 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18308 return 64;
18309 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18310 return 128;
18311 }
18312 else if (TREE_CODE (type) == COMPLEX_TYPE)
18313 {
18314
18315 if (TYPE_MODE (type) == DCmode && align < 64)
18316 return 64;
18317 if ((TYPE_MODE (type) == XCmode
18318 || TYPE_MODE (type) == TCmode) && align < 128)
18319 return 128;
18320 }
18321 else if (RECORD_OR_UNION_TYPE_P (type)
18322 && TYPE_FIELDS (type))
18323 {
18324 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18325 return 64;
18326 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18327 return 128;
18328 }
18329 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18330 || TREE_CODE (type) == INTEGER_TYPE)
18331 {
18332 if (TYPE_MODE (type) == DFmode && align < 64)
18333 return 64;
18334 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18335 return 128;
18336 }
18337
18338 return align;
18339}
18340
18341/* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18342static void
18343ix86_lower_local_decl_alignment (tree decl)
18344{
18345 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18346 DECL_ALIGN (decl), true);
18347 if (new_align < DECL_ALIGN (decl))
18348 SET_DECL_ALIGN (decl, new_align);
18349}
18350
18351/* Compute the alignment for a local variable or a stack slot. EXP is
18352 the data type or decl itself, MODE is the widest mode available and
18353 ALIGN is the alignment that the object would ordinarily have. The
18354 value of this macro is used instead of that alignment to align the
18355 object. */
18356
18357unsigned int
18358ix86_local_alignment (tree exp, machine_mode mode,
18359 unsigned int align, bool may_lower)
18360{
18361 tree type, decl;
18362
18363 if (exp && DECL_P (exp))
18364 {
18365 type = TREE_TYPE (exp);
18366 decl = exp;
18367 }
18368 else
18369 {
18370 type = exp;
18371 decl = NULL;
18372 }
18373
18374 /* Don't do dynamic stack realignment for long long objects with
18375 -mpreferred-stack-boundary=2. */
18376 if (may_lower
18377 && !TARGET_64BIT
18378 && align == 64
18379 && ix86_preferred_stack_boundary < 64
18380 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18381 && (!type || (!TYPE_USER_ALIGN (type)
18382 && !TYPE_ATOMIC (strip_array_types (type))))
18383 && (!decl || !DECL_USER_ALIGN (decl)))
18384 align = 32;
18385
18386 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18387 register in MODE. We will return the largest alignment of XF
18388 and DF. */
18389 if (!type)
18390 {
18391 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18392 align = GET_MODE_ALIGNMENT (DFmode);
18393 return align;
18394 }
18395
18396 /* Don't increase alignment for Intel MCU psABI. */
18397 if (TARGET_IAMCU)
18398 return align;
18399
18400 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18401 to 16byte boundary. Exact wording is:
18402
18403 An array uses the same alignment as its elements, except that a local or
18404 global array variable of length at least 16 bytes or
18405 a C99 variable-length array variable always has alignment of at least 16 bytes.
18406
18407 This was added to allow use of aligned SSE instructions at arrays. This
18408 rule is meant for static storage (where compiler cannot do the analysis
18409 by itself). We follow it for automatic variables only when convenient.
18410 We fully control everything in the function compiled and functions from
18411 other unit cannot rely on the alignment.
18412
18413 Exclude va_list type. It is the common case of local array where
18414 we cannot benefit from the alignment.
18415
18416 TODO: Probably one should optimize for size only when var is not escaping. */
18417 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18418 && TARGET_SSE)
18419 {
18420 if (AGGREGATE_TYPE_P (type)
18421 && (va_list_type_node == NULL_TREE
18422 || (TYPE_MAIN_VARIANT (type)
18423 != TYPE_MAIN_VARIANT (va_list_type_node)))
18424 && TYPE_SIZE (type)
18425 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18426 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18427 && align < 128)
18428 return 128;
18429 }
18430 if (TREE_CODE (type) == ARRAY_TYPE)
18431 {
18432 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18433 return 64;
18434 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18435 return 128;
18436 }
18437 else if (TREE_CODE (type) == COMPLEX_TYPE)
18438 {
18439 if (TYPE_MODE (type) == DCmode && align < 64)
18440 return 64;
18441 if ((TYPE_MODE (type) == XCmode
18442 || TYPE_MODE (type) == TCmode) && align < 128)
18443 return 128;
18444 }
18445 else if (RECORD_OR_UNION_TYPE_P (type)
18446 && TYPE_FIELDS (type))
18447 {
18448 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18449 return 64;
18450 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18451 return 128;
18452 }
18453 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18454 || TREE_CODE (type) == INTEGER_TYPE)
18455 {
18456
18457 if (TYPE_MODE (type) == DFmode && align < 64)
18458 return 64;
18459 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18460 return 128;
18461 }
18462 return align;
18463}
18464
18465/* Compute the minimum required alignment for dynamic stack realignment
18466 purposes for a local variable, parameter or a stack slot. EXP is
18467 the data type or decl itself, MODE is its mode and ALIGN is the
18468 alignment that the object would ordinarily have. */
18469
18470unsigned int
18471ix86_minimum_alignment (tree exp, machine_mode mode,
18472 unsigned int align)
18473{
18474 tree type, decl;
18475
18476 if (exp && DECL_P (exp))
18477 {
18478 type = TREE_TYPE (exp);
18479 decl = exp;
18480 }
18481 else
18482 {
18483 type = exp;
18484 decl = NULL;
18485 }
18486
18487 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18488 return align;
18489
18490 /* Don't do dynamic stack realignment for long long objects with
18491 -mpreferred-stack-boundary=2. */
18492 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18493 && (!type || (!TYPE_USER_ALIGN (type)
18494 && !TYPE_ATOMIC (strip_array_types (type))))
18495 && (!decl || !DECL_USER_ALIGN (decl)))
18496 {
18497 gcc_checking_assert (!TARGET_STV);
18498 return 32;
18499 }
18500
18501 return align;
18502}
18503
18504/* Find a location for the static chain incoming to a nested function.
18505 This is a register, unless all free registers are used by arguments. */
18506
18507static rtx
18508ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18509{
18510 unsigned regno;
18511
18512 if (TARGET_64BIT)
18513 {
18514 /* We always use R10 in 64-bit mode. */
18515 regno = R10_REG;
18516 }
18517 else
18518 {
18519 const_tree fntype, fndecl;
18520 unsigned int ccvt;
18521
18522 /* By default in 32-bit mode we use ECX to pass the static chain. */
18523 regno = CX_REG;
18524
18525 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18526 {
18527 fntype = TREE_TYPE (fndecl_or_type);
18528 fndecl = fndecl_or_type;
18529 }
18530 else
18531 {
18532 fntype = fndecl_or_type;
18533 fndecl = NULL;
18534 }
18535
18536 ccvt = ix86_get_callcvt (type: fntype);
18537 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18538 {
18539 /* Fastcall functions use ecx/edx for arguments, which leaves
18540 us with EAX for the static chain.
18541 Thiscall functions use ecx for arguments, which also
18542 leaves us with EAX for the static chain. */
18543 regno = AX_REG;
18544 }
18545 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18546 {
18547 /* Thiscall functions use ecx for arguments, which leaves
18548 us with EAX and EDX for the static chain.
18549 We are using for abi-compatibility EAX. */
18550 regno = AX_REG;
18551 }
18552 else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3)
18553 {
18554 /* For regparm 3, we have no free call-clobbered registers in
18555 which to store the static chain. In order to implement this,
18556 we have the trampoline push the static chain to the stack.
18557 However, we can't push a value below the return address when
18558 we call the nested function directly, so we have to use an
18559 alternate entry point. For this we use ESI, and have the
18560 alternate entry point push ESI, so that things appear the
18561 same once we're executing the nested function. */
18562 if (incoming_p)
18563 {
18564 if (fndecl == current_function_decl
18565 && !ix86_static_chain_on_stack)
18566 {
18567 gcc_assert (!reload_completed);
18568 ix86_static_chain_on_stack = true;
18569 }
18570 return gen_frame_mem (SImode,
18571 plus_constant (Pmode,
18572 arg_pointer_rtx, -8));
18573 }
18574 regno = SI_REG;
18575 }
18576 }
18577
18578 return gen_rtx_REG (Pmode, regno);
18579}
18580
18581/* Emit RTL insns to initialize the variable parts of a trampoline.
18582 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18583 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18584 to be passed to the target function. */
18585
18586static void
18587ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18588{
18589 rtx mem, fnaddr;
18590 int opcode;
18591 int offset = 0;
18592 bool need_endbr = (flag_cf_protection & CF_BRANCH);
18593
18594 fnaddr = XEXP (DECL_RTL (fndecl), 0);
18595
18596 if (TARGET_64BIT)
18597 {
18598 int size;
18599
18600 if (need_endbr)
18601 {
18602 /* Insert ENDBR64. */
18603 mem = adjust_address (m_tramp, SImode, offset);
18604 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18605 offset += 4;
18606 }
18607
18608 /* Load the function address to r11. Try to load address using
18609 the shorter movl instead of movabs. We may want to support
18610 movq for kernel mode, but kernel does not use trampolines at
18611 the moment. FNADDR is a 32bit address and may not be in
18612 DImode when ptr_mode == SImode. Always use movl in this
18613 case. */
18614 if (ptr_mode == SImode
18615 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18616 {
18617 fnaddr = copy_addr_to_reg (fnaddr);
18618
18619 mem = adjust_address (m_tramp, HImode, offset);
18620 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18621
18622 mem = adjust_address (m_tramp, SImode, offset + 2);
18623 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18624 offset += 6;
18625 }
18626 else
18627 {
18628 mem = adjust_address (m_tramp, HImode, offset);
18629 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18630
18631 mem = adjust_address (m_tramp, DImode, offset + 2);
18632 emit_move_insn (mem, fnaddr);
18633 offset += 10;
18634 }
18635
18636 /* Load static chain using movabs to r10. Use the shorter movl
18637 instead of movabs when ptr_mode == SImode. */
18638 if (ptr_mode == SImode)
18639 {
18640 opcode = 0xba41;
18641 size = 6;
18642 }
18643 else
18644 {
18645 opcode = 0xba49;
18646 size = 10;
18647 }
18648
18649 mem = adjust_address (m_tramp, HImode, offset);
18650 emit_move_insn (mem, gen_int_mode (opcode, HImode));
18651
18652 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18653 emit_move_insn (mem, chain_value);
18654 offset += size;
18655
18656 /* Jump to r11; the last (unused) byte is a nop, only there to
18657 pad the write out to a single 32-bit store. */
18658 mem = adjust_address (m_tramp, SImode, offset);
18659 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18660 offset += 4;
18661 }
18662 else
18663 {
18664 rtx disp, chain;
18665
18666 /* Depending on the static chain location, either load a register
18667 with a constant, or push the constant to the stack. All of the
18668 instructions are the same size. */
18669 chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true);
18670 if (REG_P (chain))
18671 {
18672 switch (REGNO (chain))
18673 {
18674 case AX_REG:
18675 opcode = 0xb8; break;
18676 case CX_REG:
18677 opcode = 0xb9; break;
18678 default:
18679 gcc_unreachable ();
18680 }
18681 }
18682 else
18683 opcode = 0x68;
18684
18685 if (need_endbr)
18686 {
18687 /* Insert ENDBR32. */
18688 mem = adjust_address (m_tramp, SImode, offset);
18689 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18690 offset += 4;
18691 }
18692
18693 mem = adjust_address (m_tramp, QImode, offset);
18694 emit_move_insn (mem, gen_int_mode (opcode, QImode));
18695
18696 mem = adjust_address (m_tramp, SImode, offset + 1);
18697 emit_move_insn (mem, chain_value);
18698 offset += 5;
18699
18700 mem = adjust_address (m_tramp, QImode, offset);
18701 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18702
18703 mem = adjust_address (m_tramp, SImode, offset + 1);
18704
18705 /* Compute offset from the end of the jmp to the target function.
18706 In the case in which the trampoline stores the static chain on
18707 the stack, we need to skip the first insn which pushes the
18708 (call-saved) register static chain; this push is 1 byte. */
18709 offset += 5;
18710 int skip = MEM_P (chain) ? 1 : 0;
18711 /* Skip ENDBR32 at the entry of the target function. */
18712 if (need_endbr
18713 && !cgraph_node::get (decl: fndecl)->only_called_directly_p ())
18714 skip += 4;
18715 disp = expand_binop (SImode, sub_optab, fnaddr,
18716 plus_constant (Pmode, XEXP (m_tramp, 0),
18717 offset - skip),
18718 NULL_RTX, 1, OPTAB_DIRECT);
18719 emit_move_insn (mem, disp);
18720 }
18721
18722 gcc_assert (offset <= TRAMPOLINE_SIZE);
18723
18724#ifdef HAVE_ENABLE_EXECUTE_STACK
18725#ifdef CHECK_EXECUTE_STACK_ENABLED
18726 if (CHECK_EXECUTE_STACK_ENABLED)
18727#endif
18728 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18729 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18730#endif
18731}
18732
18733static bool
18734ix86_allocate_stack_slots_for_args (void)
18735{
18736 /* Naked functions should not allocate stack slots for arguments. */
18737 return !ix86_function_naked (fn: current_function_decl);
18738}
18739
18740static bool
18741ix86_warn_func_return (tree decl)
18742{
18743 /* Naked functions are implemented entirely in assembly, including the
18744 return sequence, so suppress warnings about this. */
18745 return !ix86_function_naked (fn: decl);
18746}
18747
18748/* Return the shift count of a vector by scalar shift builtin second argument
18749 ARG1. */
18750static tree
18751ix86_vector_shift_count (tree arg1)
18752{
18753 if (tree_fits_uhwi_p (arg1))
18754 return arg1;
18755 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18756 {
18757 /* The count argument is weird, passed in as various 128-bit
18758 (or 64-bit) vectors, the low 64 bits from it are the count. */
18759 unsigned char buf[16];
18760 int len = native_encode_expr (arg1, buf, 16);
18761 if (len == 0)
18762 return NULL_TREE;
18763 tree t = native_interpret_expr (uint64_type_node, buf, len);
18764 if (t && tree_fits_uhwi_p (t))
18765 return t;
18766 }
18767 return NULL_TREE;
18768}
18769
18770/* Return true if arg_mask is all ones, ELEMS is elements number of
18771 corresponding vector. */
18772static bool
18773ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18774{
18775 if (TREE_CODE (arg_mask) != INTEGER_CST)
18776 return false;
18777
18778 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18779 if (elems == HOST_BITS_PER_WIDE_INT)
18780 return mask == HOST_WIDE_INT_M1U;
18781 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18782 return false;
18783
18784 return true;
18785}
18786
18787static tree
18788ix86_fold_builtin (tree fndecl, int n_args,
18789 tree *args, bool ignore ATTRIBUTE_UNUSED)
18790{
18791 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18792 {
18793 enum ix86_builtins fn_code
18794 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
18795 enum rtx_code rcode;
18796 bool is_vshift;
18797 enum tree_code tcode;
18798 bool is_scalar;
18799 unsigned HOST_WIDE_INT mask;
18800
18801 switch (fn_code)
18802 {
18803 case IX86_BUILTIN_CPU_IS:
18804 case IX86_BUILTIN_CPU_SUPPORTS:
18805 gcc_assert (n_args == 1);
18806 return fold_builtin_cpu (fndecl, args);
18807
18808 case IX86_BUILTIN_NANQ:
18809 case IX86_BUILTIN_NANSQ:
18810 {
18811 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18812 const char *str = c_getstr (*args);
18813 int quiet = fn_code == IX86_BUILTIN_NANQ;
18814 REAL_VALUE_TYPE real;
18815
18816 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18817 return build_real (type, real);
18818 return NULL_TREE;
18819 }
18820
18821 case IX86_BUILTIN_INFQ:
18822 case IX86_BUILTIN_HUGE_VALQ:
18823 {
18824 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18825 REAL_VALUE_TYPE inf;
18826 real_inf (&inf);
18827 return build_real (type, inf);
18828 }
18829
18830 case IX86_BUILTIN_TZCNT16:
18831 case IX86_BUILTIN_CTZS:
18832 case IX86_BUILTIN_TZCNT32:
18833 case IX86_BUILTIN_TZCNT64:
18834 gcc_assert (n_args == 1);
18835 if (TREE_CODE (args[0]) == INTEGER_CST)
18836 {
18837 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18838 tree arg = args[0];
18839 if (fn_code == IX86_BUILTIN_TZCNT16
18840 || fn_code == IX86_BUILTIN_CTZS)
18841 arg = fold_convert (short_unsigned_type_node, arg);
18842 if (integer_zerop (arg))
18843 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18844 else
18845 return fold_const_call (CFN_CTZ, type, arg);
18846 }
18847 break;
18848
18849 case IX86_BUILTIN_LZCNT16:
18850 case IX86_BUILTIN_CLZS:
18851 case IX86_BUILTIN_LZCNT32:
18852 case IX86_BUILTIN_LZCNT64:
18853 gcc_assert (n_args == 1);
18854 if (TREE_CODE (args[0]) == INTEGER_CST)
18855 {
18856 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18857 tree arg = args[0];
18858 if (fn_code == IX86_BUILTIN_LZCNT16
18859 || fn_code == IX86_BUILTIN_CLZS)
18860 arg = fold_convert (short_unsigned_type_node, arg);
18861 if (integer_zerop (arg))
18862 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18863 else
18864 return fold_const_call (CFN_CLZ, type, arg);
18865 }
18866 break;
18867
18868 case IX86_BUILTIN_BEXTR32:
18869 case IX86_BUILTIN_BEXTR64:
18870 case IX86_BUILTIN_BEXTRI32:
18871 case IX86_BUILTIN_BEXTRI64:
18872 gcc_assert (n_args == 2);
18873 if (tree_fits_uhwi_p (args[1]))
18874 {
18875 unsigned HOST_WIDE_INT res = 0;
18876 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18877 unsigned int start = tree_to_uhwi (args[1]);
18878 unsigned int len = (start & 0xff00) >> 8;
18879 tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
18880 start &= 0xff;
18881 if (start >= prec || len == 0)
18882 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
18883 args[0]);
18884 else if (!tree_fits_uhwi_p (args[0]))
18885 break;
18886 else
18887 res = tree_to_uhwi (args[0]) >> start;
18888 if (len > prec)
18889 len = prec;
18890 if (len < HOST_BITS_PER_WIDE_INT)
18891 res &= (HOST_WIDE_INT_1U << len) - 1;
18892 return build_int_cstu (type: lhs_type, res);
18893 }
18894 break;
18895
18896 case IX86_BUILTIN_BZHI32:
18897 case IX86_BUILTIN_BZHI64:
18898 gcc_assert (n_args == 2);
18899 if (tree_fits_uhwi_p (args[1]))
18900 {
18901 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18902 tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
18903 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18904 return args[0];
18905 if (idx == 0)
18906 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
18907 args[0]);
18908 if (!tree_fits_uhwi_p (args[0]))
18909 break;
18910 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18911 res &= ~(HOST_WIDE_INT_M1U << idx);
18912 return build_int_cstu (type: lhs_type, res);
18913 }
18914 break;
18915
18916 case IX86_BUILTIN_PDEP32:
18917 case IX86_BUILTIN_PDEP64:
18918 gcc_assert (n_args == 2);
18919 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18920 {
18921 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18922 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18923 unsigned HOST_WIDE_INT res = 0;
18924 unsigned HOST_WIDE_INT m, k = 1;
18925 for (m = 1; m; m <<= 1)
18926 if ((mask & m) != 0)
18927 {
18928 if ((src & k) != 0)
18929 res |= m;
18930 k <<= 1;
18931 }
18932 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18933 }
18934 break;
18935
18936 case IX86_BUILTIN_PEXT32:
18937 case IX86_BUILTIN_PEXT64:
18938 gcc_assert (n_args == 2);
18939 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18940 {
18941 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18942 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18943 unsigned HOST_WIDE_INT res = 0;
18944 unsigned HOST_WIDE_INT m, k = 1;
18945 for (m = 1; m; m <<= 1)
18946 if ((mask & m) != 0)
18947 {
18948 if ((src & m) != 0)
18949 res |= k;
18950 k <<= 1;
18951 }
18952 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18953 }
18954 break;
18955
18956 case IX86_BUILTIN_MOVMSKPS:
18957 case IX86_BUILTIN_PMOVMSKB:
18958 case IX86_BUILTIN_MOVMSKPD:
18959 case IX86_BUILTIN_PMOVMSKB128:
18960 case IX86_BUILTIN_MOVMSKPD256:
18961 case IX86_BUILTIN_MOVMSKPS256:
18962 case IX86_BUILTIN_PMOVMSKB256:
18963 gcc_assert (n_args == 1);
18964 if (TREE_CODE (args[0]) == VECTOR_CST)
18965 {
18966 HOST_WIDE_INT res = 0;
18967 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18968 {
18969 tree e = VECTOR_CST_ELT (args[0], i);
18970 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18971 {
18972 if (wi::neg_p (x: wi::to_wide (t: e)))
18973 res |= HOST_WIDE_INT_1 << i;
18974 }
18975 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18976 {
18977 if (TREE_REAL_CST (e).sign)
18978 res |= HOST_WIDE_INT_1 << i;
18979 }
18980 else
18981 return NULL_TREE;
18982 }
18983 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18984 }
18985 break;
18986
18987 case IX86_BUILTIN_PSLLD:
18988 case IX86_BUILTIN_PSLLD128:
18989 case IX86_BUILTIN_PSLLD128_MASK:
18990 case IX86_BUILTIN_PSLLD256:
18991 case IX86_BUILTIN_PSLLD256_MASK:
18992 case IX86_BUILTIN_PSLLD512:
18993 case IX86_BUILTIN_PSLLDI:
18994 case IX86_BUILTIN_PSLLDI128:
18995 case IX86_BUILTIN_PSLLDI128_MASK:
18996 case IX86_BUILTIN_PSLLDI256:
18997 case IX86_BUILTIN_PSLLDI256_MASK:
18998 case IX86_BUILTIN_PSLLDI512:
18999 case IX86_BUILTIN_PSLLQ:
19000 case IX86_BUILTIN_PSLLQ128:
19001 case IX86_BUILTIN_PSLLQ128_MASK:
19002 case IX86_BUILTIN_PSLLQ256:
19003 case IX86_BUILTIN_PSLLQ256_MASK:
19004 case IX86_BUILTIN_PSLLQ512:
19005 case IX86_BUILTIN_PSLLQI:
19006 case IX86_BUILTIN_PSLLQI128:
19007 case IX86_BUILTIN_PSLLQI128_MASK:
19008 case IX86_BUILTIN_PSLLQI256:
19009 case IX86_BUILTIN_PSLLQI256_MASK:
19010 case IX86_BUILTIN_PSLLQI512:
19011 case IX86_BUILTIN_PSLLW:
19012 case IX86_BUILTIN_PSLLW128:
19013 case IX86_BUILTIN_PSLLW128_MASK:
19014 case IX86_BUILTIN_PSLLW256:
19015 case IX86_BUILTIN_PSLLW256_MASK:
19016 case IX86_BUILTIN_PSLLW512_MASK:
19017 case IX86_BUILTIN_PSLLWI:
19018 case IX86_BUILTIN_PSLLWI128:
19019 case IX86_BUILTIN_PSLLWI128_MASK:
19020 case IX86_BUILTIN_PSLLWI256:
19021 case IX86_BUILTIN_PSLLWI256_MASK:
19022 case IX86_BUILTIN_PSLLWI512_MASK:
19023 rcode = ASHIFT;
19024 is_vshift = false;
19025 goto do_shift;
19026 case IX86_BUILTIN_PSRAD:
19027 case IX86_BUILTIN_PSRAD128:
19028 case IX86_BUILTIN_PSRAD128_MASK:
19029 case IX86_BUILTIN_PSRAD256:
19030 case IX86_BUILTIN_PSRAD256_MASK:
19031 case IX86_BUILTIN_PSRAD512:
19032 case IX86_BUILTIN_PSRADI:
19033 case IX86_BUILTIN_PSRADI128:
19034 case IX86_BUILTIN_PSRADI128_MASK:
19035 case IX86_BUILTIN_PSRADI256:
19036 case IX86_BUILTIN_PSRADI256_MASK:
19037 case IX86_BUILTIN_PSRADI512:
19038 case IX86_BUILTIN_PSRAQ128_MASK:
19039 case IX86_BUILTIN_PSRAQ256_MASK:
19040 case IX86_BUILTIN_PSRAQ512:
19041 case IX86_BUILTIN_PSRAQI128_MASK:
19042 case IX86_BUILTIN_PSRAQI256_MASK:
19043 case IX86_BUILTIN_PSRAQI512:
19044 case IX86_BUILTIN_PSRAW:
19045 case IX86_BUILTIN_PSRAW128:
19046 case IX86_BUILTIN_PSRAW128_MASK:
19047 case IX86_BUILTIN_PSRAW256:
19048 case IX86_BUILTIN_PSRAW256_MASK:
19049 case IX86_BUILTIN_PSRAW512:
19050 case IX86_BUILTIN_PSRAWI:
19051 case IX86_BUILTIN_PSRAWI128:
19052 case IX86_BUILTIN_PSRAWI128_MASK:
19053 case IX86_BUILTIN_PSRAWI256:
19054 case IX86_BUILTIN_PSRAWI256_MASK:
19055 case IX86_BUILTIN_PSRAWI512:
19056 rcode = ASHIFTRT;
19057 is_vshift = false;
19058 goto do_shift;
19059 case IX86_BUILTIN_PSRLD:
19060 case IX86_BUILTIN_PSRLD128:
19061 case IX86_BUILTIN_PSRLD128_MASK:
19062 case IX86_BUILTIN_PSRLD256:
19063 case IX86_BUILTIN_PSRLD256_MASK:
19064 case IX86_BUILTIN_PSRLD512:
19065 case IX86_BUILTIN_PSRLDI:
19066 case IX86_BUILTIN_PSRLDI128:
19067 case IX86_BUILTIN_PSRLDI128_MASK:
19068 case IX86_BUILTIN_PSRLDI256:
19069 case IX86_BUILTIN_PSRLDI256_MASK:
19070 case IX86_BUILTIN_PSRLDI512:
19071 case IX86_BUILTIN_PSRLQ:
19072 case IX86_BUILTIN_PSRLQ128:
19073 case IX86_BUILTIN_PSRLQ128_MASK:
19074 case IX86_BUILTIN_PSRLQ256:
19075 case IX86_BUILTIN_PSRLQ256_MASK:
19076 case IX86_BUILTIN_PSRLQ512:
19077 case IX86_BUILTIN_PSRLQI:
19078 case IX86_BUILTIN_PSRLQI128:
19079 case IX86_BUILTIN_PSRLQI128_MASK:
19080 case IX86_BUILTIN_PSRLQI256:
19081 case IX86_BUILTIN_PSRLQI256_MASK:
19082 case IX86_BUILTIN_PSRLQI512:
19083 case IX86_BUILTIN_PSRLW:
19084 case IX86_BUILTIN_PSRLW128:
19085 case IX86_BUILTIN_PSRLW128_MASK:
19086 case IX86_BUILTIN_PSRLW256:
19087 case IX86_BUILTIN_PSRLW256_MASK:
19088 case IX86_BUILTIN_PSRLW512:
19089 case IX86_BUILTIN_PSRLWI:
19090 case IX86_BUILTIN_PSRLWI128:
19091 case IX86_BUILTIN_PSRLWI128_MASK:
19092 case IX86_BUILTIN_PSRLWI256:
19093 case IX86_BUILTIN_PSRLWI256_MASK:
19094 case IX86_BUILTIN_PSRLWI512:
19095 rcode = LSHIFTRT;
19096 is_vshift = false;
19097 goto do_shift;
19098 case IX86_BUILTIN_PSLLVV16HI:
19099 case IX86_BUILTIN_PSLLVV16SI:
19100 case IX86_BUILTIN_PSLLVV2DI:
19101 case IX86_BUILTIN_PSLLVV2DI_MASK:
19102 case IX86_BUILTIN_PSLLVV32HI:
19103 case IX86_BUILTIN_PSLLVV4DI:
19104 case IX86_BUILTIN_PSLLVV4DI_MASK:
19105 case IX86_BUILTIN_PSLLVV4SI:
19106 case IX86_BUILTIN_PSLLVV4SI_MASK:
19107 case IX86_BUILTIN_PSLLVV8DI:
19108 case IX86_BUILTIN_PSLLVV8HI:
19109 case IX86_BUILTIN_PSLLVV8SI:
19110 case IX86_BUILTIN_PSLLVV8SI_MASK:
19111 rcode = ASHIFT;
19112 is_vshift = true;
19113 goto do_shift;
19114 case IX86_BUILTIN_PSRAVQ128:
19115 case IX86_BUILTIN_PSRAVQ256:
19116 case IX86_BUILTIN_PSRAVV16HI:
19117 case IX86_BUILTIN_PSRAVV16SI:
19118 case IX86_BUILTIN_PSRAVV32HI:
19119 case IX86_BUILTIN_PSRAVV4SI:
19120 case IX86_BUILTIN_PSRAVV4SI_MASK:
19121 case IX86_BUILTIN_PSRAVV8DI:
19122 case IX86_BUILTIN_PSRAVV8HI:
19123 case IX86_BUILTIN_PSRAVV8SI:
19124 case IX86_BUILTIN_PSRAVV8SI_MASK:
19125 rcode = ASHIFTRT;
19126 is_vshift = true;
19127 goto do_shift;
19128 case IX86_BUILTIN_PSRLVV16HI:
19129 case IX86_BUILTIN_PSRLVV16SI:
19130 case IX86_BUILTIN_PSRLVV2DI:
19131 case IX86_BUILTIN_PSRLVV2DI_MASK:
19132 case IX86_BUILTIN_PSRLVV32HI:
19133 case IX86_BUILTIN_PSRLVV4DI:
19134 case IX86_BUILTIN_PSRLVV4DI_MASK:
19135 case IX86_BUILTIN_PSRLVV4SI:
19136 case IX86_BUILTIN_PSRLVV4SI_MASK:
19137 case IX86_BUILTIN_PSRLVV8DI:
19138 case IX86_BUILTIN_PSRLVV8HI:
19139 case IX86_BUILTIN_PSRLVV8SI:
19140 case IX86_BUILTIN_PSRLVV8SI_MASK:
19141 rcode = LSHIFTRT;
19142 is_vshift = true;
19143 goto do_shift;
19144
19145 do_shift:
19146 gcc_assert (n_args >= 2);
19147 if (TREE_CODE (args[0]) != VECTOR_CST)
19148 break;
19149 mask = HOST_WIDE_INT_M1U;
19150 if (n_args > 2)
19151 {
19152 /* This is masked shift. */
19153 if (!tree_fits_uhwi_p (args[n_args - 1])
19154 || TREE_SIDE_EFFECTS (args[n_args - 2]))
19155 break;
19156 mask = tree_to_uhwi (args[n_args - 1]);
19157 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19158 mask |= HOST_WIDE_INT_M1U << elems;
19159 if (mask != HOST_WIDE_INT_M1U
19160 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
19161 break;
19162 if (mask == (HOST_WIDE_INT_M1U << elems))
19163 return args[n_args - 2];
19164 }
19165 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
19166 break;
19167 if (tree tem = (is_vshift ? integer_one_node
19168 : ix86_vector_shift_count (arg1: args[1])))
19169 {
19170 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
19171 unsigned HOST_WIDE_INT prec
19172 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
19173 if (count == 0 && mask == HOST_WIDE_INT_M1U)
19174 return args[0];
19175 if (count >= prec)
19176 {
19177 if (rcode == ASHIFTRT)
19178 count = prec - 1;
19179 else if (mask == HOST_WIDE_INT_M1U)
19180 return build_zero_cst (TREE_TYPE (args[0]));
19181 }
19182 tree countt = NULL_TREE;
19183 if (!is_vshift)
19184 {
19185 if (count >= prec)
19186 countt = integer_zero_node;
19187 else
19188 countt = build_int_cst (integer_type_node, count);
19189 }
19190 tree_vector_builder builder;
19191 if (mask != HOST_WIDE_INT_M1U || is_vshift)
19192 builder.new_vector (TREE_TYPE (args[0]),
19193 npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
19194 nelts_per_pattern: 1);
19195 else
19196 builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0],
19197 allow_stepped_p: false);
19198 unsigned int cnt = builder.encoded_nelts ();
19199 for (unsigned int i = 0; i < cnt; ++i)
19200 {
19201 tree elt = VECTOR_CST_ELT (args[0], i);
19202 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
19203 return NULL_TREE;
19204 tree type = TREE_TYPE (elt);
19205 if (rcode == LSHIFTRT)
19206 elt = fold_convert (unsigned_type_for (type), elt);
19207 if (is_vshift)
19208 {
19209 countt = VECTOR_CST_ELT (args[1], i);
19210 if (TREE_CODE (countt) != INTEGER_CST
19211 || TREE_OVERFLOW (countt))
19212 return NULL_TREE;
19213 if (wi::neg_p (x: wi::to_wide (t: countt))
19214 || wi::to_widest (t: countt) >= prec)
19215 {
19216 if (rcode == ASHIFTRT)
19217 countt = build_int_cst (TREE_TYPE (countt),
19218 prec - 1);
19219 else
19220 {
19221 elt = build_zero_cst (TREE_TYPE (elt));
19222 countt = build_zero_cst (TREE_TYPE (countt));
19223 }
19224 }
19225 }
19226 else if (count >= prec)
19227 elt = build_zero_cst (TREE_TYPE (elt));
19228 elt = const_binop (rcode == ASHIFT
19229 ? LSHIFT_EXPR : RSHIFT_EXPR,
19230 TREE_TYPE (elt), elt, countt);
19231 if (!elt || TREE_CODE (elt) != INTEGER_CST)
19232 return NULL_TREE;
19233 if (rcode == LSHIFTRT)
19234 elt = fold_convert (type, elt);
19235 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19236 {
19237 elt = VECTOR_CST_ELT (args[n_args - 2], i);
19238 if (TREE_CODE (elt) != INTEGER_CST
19239 || TREE_OVERFLOW (elt))
19240 return NULL_TREE;
19241 }
19242 builder.quick_push (obj: elt);
19243 }
19244 return builder.build ();
19245 }
19246 break;
19247
19248 case IX86_BUILTIN_MINSS:
19249 case IX86_BUILTIN_MINSH_MASK:
19250 tcode = LT_EXPR;
19251 is_scalar = true;
19252 goto do_minmax;
19253
19254 case IX86_BUILTIN_MAXSS:
19255 case IX86_BUILTIN_MAXSH_MASK:
19256 tcode = GT_EXPR;
19257 is_scalar = true;
19258 goto do_minmax;
19259
19260 case IX86_BUILTIN_MINPS:
19261 case IX86_BUILTIN_MINPD:
19262 case IX86_BUILTIN_MINPS256:
19263 case IX86_BUILTIN_MINPD256:
19264 case IX86_BUILTIN_MINPS512:
19265 case IX86_BUILTIN_MINPD512:
19266 case IX86_BUILTIN_MINPS128_MASK:
19267 case IX86_BUILTIN_MINPD128_MASK:
19268 case IX86_BUILTIN_MINPS256_MASK:
19269 case IX86_BUILTIN_MINPD256_MASK:
19270 case IX86_BUILTIN_MINPH128_MASK:
19271 case IX86_BUILTIN_MINPH256_MASK:
19272 case IX86_BUILTIN_MINPH512_MASK:
19273 tcode = LT_EXPR;
19274 is_scalar = false;
19275 goto do_minmax;
19276
19277 case IX86_BUILTIN_MAXPS:
19278 case IX86_BUILTIN_MAXPD:
19279 case IX86_BUILTIN_MAXPS256:
19280 case IX86_BUILTIN_MAXPD256:
19281 case IX86_BUILTIN_MAXPS512:
19282 case IX86_BUILTIN_MAXPD512:
19283 case IX86_BUILTIN_MAXPS128_MASK:
19284 case IX86_BUILTIN_MAXPD128_MASK:
19285 case IX86_BUILTIN_MAXPS256_MASK:
19286 case IX86_BUILTIN_MAXPD256_MASK:
19287 case IX86_BUILTIN_MAXPH128_MASK:
19288 case IX86_BUILTIN_MAXPH256_MASK:
19289 case IX86_BUILTIN_MAXPH512_MASK:
19290 tcode = GT_EXPR;
19291 is_scalar = false;
19292 do_minmax:
19293 gcc_assert (n_args >= 2);
19294 if (TREE_CODE (args[0]) != VECTOR_CST
19295 || TREE_CODE (args[1]) != VECTOR_CST)
19296 break;
19297 mask = HOST_WIDE_INT_M1U;
19298 if (n_args > 2)
19299 {
19300 gcc_assert (n_args >= 4);
19301 /* This is masked minmax. */
19302 if (TREE_CODE (args[3]) != INTEGER_CST
19303 || TREE_SIDE_EFFECTS (args[2]))
19304 break;
19305 mask = TREE_INT_CST_LOW (args[3]);
19306 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19307 mask |= HOST_WIDE_INT_M1U << elems;
19308 if (mask != HOST_WIDE_INT_M1U
19309 && TREE_CODE (args[2]) != VECTOR_CST)
19310 break;
19311 if (n_args >= 5)
19312 {
19313 if (!tree_fits_uhwi_p (args[4]))
19314 break;
19315 if (tree_to_uhwi (args[4]) != 4
19316 && tree_to_uhwi (args[4]) != 8)
19317 break;
19318 }
19319 if (mask == (HOST_WIDE_INT_M1U << elems))
19320 return args[2];
19321 }
19322 /* Punt on NaNs, unless exceptions are disabled. */
19323 if (HONOR_NANS (args[0])
19324 && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19325 for (int i = 0; i < 2; ++i)
19326 {
19327 unsigned count = vector_cst_encoded_nelts (t: args[i]);
19328 for (unsigned j = 0; j < count; ++j)
19329 if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19330 return NULL_TREE;
19331 }
19332 {
19333 tree res = const_binop (tcode,
19334 truth_type_for (TREE_TYPE (args[0])),
19335 args[0], args[1]);
19336 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19337 break;
19338 res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19339 args[0], args[1]);
19340 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19341 break;
19342 if (mask != HOST_WIDE_INT_M1U)
19343 {
19344 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19345 vec_perm_builder sel (nelts, nelts, 1);
19346 for (unsigned int i = 0; i < nelts; i++)
19347 if (mask & (HOST_WIDE_INT_1U << i))
19348 sel.quick_push (obj: i);
19349 else
19350 sel.quick_push (obj: nelts + i);
19351 vec_perm_indices indices (sel, 2, nelts);
19352 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19353 indices);
19354 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19355 break;
19356 }
19357 if (is_scalar)
19358 {
19359 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19360 vec_perm_builder sel (nelts, nelts, 1);
19361 sel.quick_push (obj: 0);
19362 for (unsigned int i = 1; i < nelts; i++)
19363 sel.quick_push (obj: nelts + i);
19364 vec_perm_indices indices (sel, 2, nelts);
19365 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19366 indices);
19367 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19368 break;
19369 }
19370 return res;
19371 }
19372
19373 default:
19374 break;
19375 }
19376 }
19377
19378#ifdef SUBTARGET_FOLD_BUILTIN
19379 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19380#endif
19381
19382 return NULL_TREE;
19383}
19384
19385/* Fold a MD builtin (use ix86_fold_builtin for folding into
19386 constant) in GIMPLE. */
19387
19388bool
19389ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19390{
19391 gimple *stmt = gsi_stmt (i: *gsi), *g;
19392 gimple_seq stmts = NULL;
19393 tree fndecl = gimple_call_fndecl (gs: stmt);
19394 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19395 int n_args = gimple_call_num_args (gs: stmt);
19396 enum ix86_builtins fn_code
19397 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
19398 tree decl = NULL_TREE;
19399 tree arg0, arg1, arg2;
19400 enum rtx_code rcode;
19401 enum tree_code tcode;
19402 unsigned HOST_WIDE_INT count;
19403 bool is_vshift;
19404 unsigned HOST_WIDE_INT elems;
19405 location_t loc;
19406
19407 /* Don't fold when there's isa mismatch. */
19408 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19409 return false;
19410
19411 switch (fn_code)
19412 {
19413 case IX86_BUILTIN_TZCNT32:
19414 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ);
19415 goto fold_tzcnt_lzcnt;
19416
19417 case IX86_BUILTIN_TZCNT64:
19418 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL);
19419 goto fold_tzcnt_lzcnt;
19420
19421 case IX86_BUILTIN_LZCNT32:
19422 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ);
19423 goto fold_tzcnt_lzcnt;
19424
19425 case IX86_BUILTIN_LZCNT64:
19426 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL);
19427 goto fold_tzcnt_lzcnt;
19428
19429 fold_tzcnt_lzcnt:
19430 gcc_assert (n_args == 1);
19431 arg0 = gimple_call_arg (gs: stmt, index: 0);
19432 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt))
19433 {
19434 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19435 /* If arg0 is provably non-zero, optimize into generic
19436 __builtin_c[tl]z{,ll} function the middle-end handles
19437 better. */
19438 if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec)))
19439 return false;
19440
19441 loc = gimple_location (g: stmt);
19442 g = gimple_build_call (decl, 1, arg0);
19443 gimple_set_location (g, location: loc);
19444 tree lhs = make_ssa_name (integer_type_node);
19445 gimple_call_set_lhs (gs: g, lhs);
19446 gsi_insert_before (gsi, g, GSI_SAME_STMT);
19447 g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs);
19448 gimple_set_location (g, location: loc);
19449 gsi_replace (gsi, g, false);
19450 return true;
19451 }
19452 break;
19453
19454 case IX86_BUILTIN_BZHI32:
19455 case IX86_BUILTIN_BZHI64:
19456 gcc_assert (n_args == 2);
19457 arg1 = gimple_call_arg (gs: stmt, index: 1);
19458 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt))
19459 {
19460 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19461 arg0 = gimple_call_arg (gs: stmt, index: 0);
19462 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19463 break;
19464 loc = gimple_location (g: stmt);
19465 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19466 gimple_set_location (g, location: loc);
19467 gsi_replace (gsi, g, false);
19468 return true;
19469 }
19470 break;
19471
19472 case IX86_BUILTIN_PDEP32:
19473 case IX86_BUILTIN_PDEP64:
19474 case IX86_BUILTIN_PEXT32:
19475 case IX86_BUILTIN_PEXT64:
19476 gcc_assert (n_args == 2);
19477 arg1 = gimple_call_arg (gs: stmt, index: 1);
19478 if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt))
19479 {
19480 loc = gimple_location (g: stmt);
19481 arg0 = gimple_call_arg (gs: stmt, index: 0);
19482 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19483 gimple_set_location (g, location: loc);
19484 gsi_replace (gsi, g, false);
19485 return true;
19486 }
19487 break;
19488
19489 case IX86_BUILTIN_PBLENDVB256:
19490 case IX86_BUILTIN_BLENDVPS256:
19491 case IX86_BUILTIN_BLENDVPD256:
19492 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19493 to scalar operations and not combined back. */
19494 if (!TARGET_AVX2)
19495 break;
19496
19497 /* FALLTHRU. */
19498 case IX86_BUILTIN_BLENDVPD:
19499 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19500 w/o sse4.2, it's veclowered to scalar operations and
19501 not combined back. */
19502 if (!TARGET_SSE4_2)
19503 break;
19504 /* FALLTHRU. */
19505 case IX86_BUILTIN_PBLENDVB128:
19506 case IX86_BUILTIN_BLENDVPS:
19507 gcc_assert (n_args == 3);
19508 arg0 = gimple_call_arg (gs: stmt, index: 0);
19509 arg1 = gimple_call_arg (gs: stmt, index: 1);
19510 arg2 = gimple_call_arg (gs: stmt, index: 2);
19511 if (gimple_call_lhs (gs: stmt))
19512 {
19513 loc = gimple_location (g: stmt);
19514 tree type = TREE_TYPE (arg2);
19515 if (VECTOR_FLOAT_TYPE_P (type))
19516 {
19517 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19518 ? intSI_type_node : intDI_type_node;
19519 type = get_same_sized_vectype (itype, type);
19520 }
19521 else
19522 type = signed_type_for (type);
19523 arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2);
19524 tree zero_vec = build_zero_cst (type);
19525 tree cmp_type = truth_type_for (type);
19526 tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec);
19527 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19528 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19529 VEC_COND_EXPR, cmp,
19530 arg1, arg0);
19531 gimple_set_location (g, location: loc);
19532 gsi_replace (gsi, g, false);
19533 }
19534 else
19535 gsi_replace (gsi, gimple_build_nop (), false);
19536 return true;
19537
19538
19539 case IX86_BUILTIN_PCMPEQB128:
19540 case IX86_BUILTIN_PCMPEQW128:
19541 case IX86_BUILTIN_PCMPEQD128:
19542 case IX86_BUILTIN_PCMPEQQ:
19543 case IX86_BUILTIN_PCMPEQB256:
19544 case IX86_BUILTIN_PCMPEQW256:
19545 case IX86_BUILTIN_PCMPEQD256:
19546 case IX86_BUILTIN_PCMPEQQ256:
19547 tcode = EQ_EXPR;
19548 goto do_cmp;
19549
19550 case IX86_BUILTIN_PCMPGTB128:
19551 case IX86_BUILTIN_PCMPGTW128:
19552 case IX86_BUILTIN_PCMPGTD128:
19553 case IX86_BUILTIN_PCMPGTQ:
19554 case IX86_BUILTIN_PCMPGTB256:
19555 case IX86_BUILTIN_PCMPGTW256:
19556 case IX86_BUILTIN_PCMPGTD256:
19557 case IX86_BUILTIN_PCMPGTQ256:
19558 tcode = GT_EXPR;
19559
19560 do_cmp:
19561 gcc_assert (n_args == 2);
19562 arg0 = gimple_call_arg (gs: stmt, index: 0);
19563 arg1 = gimple_call_arg (gs: stmt, index: 1);
19564 if (gimple_call_lhs (gs: stmt))
19565 {
19566 loc = gimple_location (g: stmt);
19567 tree type = TREE_TYPE (arg0);
19568 tree zero_vec = build_zero_cst (type);
19569 tree minus_one_vec = build_minus_one_cst (type);
19570 tree cmp_type = truth_type_for (type);
19571 tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1);
19572 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19573 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19574 VEC_COND_EXPR, cmp,
19575 minus_one_vec, zero_vec);
19576 gimple_set_location (g, location: loc);
19577 gsi_replace (gsi, g, false);
19578 }
19579 else
19580 gsi_replace (gsi, gimple_build_nop (), false);
19581 return true;
19582
19583 case IX86_BUILTIN_PSLLD:
19584 case IX86_BUILTIN_PSLLD128:
19585 case IX86_BUILTIN_PSLLD128_MASK:
19586 case IX86_BUILTIN_PSLLD256:
19587 case IX86_BUILTIN_PSLLD256_MASK:
19588 case IX86_BUILTIN_PSLLD512:
19589 case IX86_BUILTIN_PSLLDI:
19590 case IX86_BUILTIN_PSLLDI128:
19591 case IX86_BUILTIN_PSLLDI128_MASK:
19592 case IX86_BUILTIN_PSLLDI256:
19593 case IX86_BUILTIN_PSLLDI256_MASK:
19594 case IX86_BUILTIN_PSLLDI512:
19595 case IX86_BUILTIN_PSLLQ:
19596 case IX86_BUILTIN_PSLLQ128:
19597 case IX86_BUILTIN_PSLLQ128_MASK:
19598 case IX86_BUILTIN_PSLLQ256:
19599 case IX86_BUILTIN_PSLLQ256_MASK:
19600 case IX86_BUILTIN_PSLLQ512:
19601 case IX86_BUILTIN_PSLLQI:
19602 case IX86_BUILTIN_PSLLQI128:
19603 case IX86_BUILTIN_PSLLQI128_MASK:
19604 case IX86_BUILTIN_PSLLQI256:
19605 case IX86_BUILTIN_PSLLQI256_MASK:
19606 case IX86_BUILTIN_PSLLQI512:
19607 case IX86_BUILTIN_PSLLW:
19608 case IX86_BUILTIN_PSLLW128:
19609 case IX86_BUILTIN_PSLLW128_MASK:
19610 case IX86_BUILTIN_PSLLW256:
19611 case IX86_BUILTIN_PSLLW256_MASK:
19612 case IX86_BUILTIN_PSLLW512_MASK:
19613 case IX86_BUILTIN_PSLLWI:
19614 case IX86_BUILTIN_PSLLWI128:
19615 case IX86_BUILTIN_PSLLWI128_MASK:
19616 case IX86_BUILTIN_PSLLWI256:
19617 case IX86_BUILTIN_PSLLWI256_MASK:
19618 case IX86_BUILTIN_PSLLWI512_MASK:
19619 rcode = ASHIFT;
19620 is_vshift = false;
19621 goto do_shift;
19622 case IX86_BUILTIN_PSRAD:
19623 case IX86_BUILTIN_PSRAD128:
19624 case IX86_BUILTIN_PSRAD128_MASK:
19625 case IX86_BUILTIN_PSRAD256:
19626 case IX86_BUILTIN_PSRAD256_MASK:
19627 case IX86_BUILTIN_PSRAD512:
19628 case IX86_BUILTIN_PSRADI:
19629 case IX86_BUILTIN_PSRADI128:
19630 case IX86_BUILTIN_PSRADI128_MASK:
19631 case IX86_BUILTIN_PSRADI256:
19632 case IX86_BUILTIN_PSRADI256_MASK:
19633 case IX86_BUILTIN_PSRADI512:
19634 case IX86_BUILTIN_PSRAQ128_MASK:
19635 case IX86_BUILTIN_PSRAQ256_MASK:
19636 case IX86_BUILTIN_PSRAQ512:
19637 case IX86_BUILTIN_PSRAQI128_MASK:
19638 case IX86_BUILTIN_PSRAQI256_MASK:
19639 case IX86_BUILTIN_PSRAQI512:
19640 case IX86_BUILTIN_PSRAW:
19641 case IX86_BUILTIN_PSRAW128:
19642 case IX86_BUILTIN_PSRAW128_MASK:
19643 case IX86_BUILTIN_PSRAW256:
19644 case IX86_BUILTIN_PSRAW256_MASK:
19645 case IX86_BUILTIN_PSRAW512:
19646 case IX86_BUILTIN_PSRAWI:
19647 case IX86_BUILTIN_PSRAWI128:
19648 case IX86_BUILTIN_PSRAWI128_MASK:
19649 case IX86_BUILTIN_PSRAWI256:
19650 case IX86_BUILTIN_PSRAWI256_MASK:
19651 case IX86_BUILTIN_PSRAWI512:
19652 rcode = ASHIFTRT;
19653 is_vshift = false;
19654 goto do_shift;
19655 case IX86_BUILTIN_PSRLD:
19656 case IX86_BUILTIN_PSRLD128:
19657 case IX86_BUILTIN_PSRLD128_MASK:
19658 case IX86_BUILTIN_PSRLD256:
19659 case IX86_BUILTIN_PSRLD256_MASK:
19660 case IX86_BUILTIN_PSRLD512:
19661 case IX86_BUILTIN_PSRLDI:
19662 case IX86_BUILTIN_PSRLDI128:
19663 case IX86_BUILTIN_PSRLDI128_MASK:
19664 case IX86_BUILTIN_PSRLDI256:
19665 case IX86_BUILTIN_PSRLDI256_MASK:
19666 case IX86_BUILTIN_PSRLDI512:
19667 case IX86_BUILTIN_PSRLQ:
19668 case IX86_BUILTIN_PSRLQ128:
19669 case IX86_BUILTIN_PSRLQ128_MASK:
19670 case IX86_BUILTIN_PSRLQ256:
19671 case IX86_BUILTIN_PSRLQ256_MASK:
19672 case IX86_BUILTIN_PSRLQ512:
19673 case IX86_BUILTIN_PSRLQI:
19674 case IX86_BUILTIN_PSRLQI128:
19675 case IX86_BUILTIN_PSRLQI128_MASK:
19676 case IX86_BUILTIN_PSRLQI256:
19677 case IX86_BUILTIN_PSRLQI256_MASK:
19678 case IX86_BUILTIN_PSRLQI512:
19679 case IX86_BUILTIN_PSRLW:
19680 case IX86_BUILTIN_PSRLW128:
19681 case IX86_BUILTIN_PSRLW128_MASK:
19682 case IX86_BUILTIN_PSRLW256:
19683 case IX86_BUILTIN_PSRLW256_MASK:
19684 case IX86_BUILTIN_PSRLW512:
19685 case IX86_BUILTIN_PSRLWI:
19686 case IX86_BUILTIN_PSRLWI128:
19687 case IX86_BUILTIN_PSRLWI128_MASK:
19688 case IX86_BUILTIN_PSRLWI256:
19689 case IX86_BUILTIN_PSRLWI256_MASK:
19690 case IX86_BUILTIN_PSRLWI512:
19691 rcode = LSHIFTRT;
19692 is_vshift = false;
19693 goto do_shift;
19694 case IX86_BUILTIN_PSLLVV16HI:
19695 case IX86_BUILTIN_PSLLVV16SI:
19696 case IX86_BUILTIN_PSLLVV2DI:
19697 case IX86_BUILTIN_PSLLVV2DI_MASK:
19698 case IX86_BUILTIN_PSLLVV32HI:
19699 case IX86_BUILTIN_PSLLVV4DI:
19700 case IX86_BUILTIN_PSLLVV4DI_MASK:
19701 case IX86_BUILTIN_PSLLVV4SI:
19702 case IX86_BUILTIN_PSLLVV4SI_MASK:
19703 case IX86_BUILTIN_PSLLVV8DI:
19704 case IX86_BUILTIN_PSLLVV8HI:
19705 case IX86_BUILTIN_PSLLVV8SI:
19706 case IX86_BUILTIN_PSLLVV8SI_MASK:
19707 rcode = ASHIFT;
19708 is_vshift = true;
19709 goto do_shift;
19710 case IX86_BUILTIN_PSRAVQ128:
19711 case IX86_BUILTIN_PSRAVQ256:
19712 case IX86_BUILTIN_PSRAVV16HI:
19713 case IX86_BUILTIN_PSRAVV16SI:
19714 case IX86_BUILTIN_PSRAVV32HI:
19715 case IX86_BUILTIN_PSRAVV4SI:
19716 case IX86_BUILTIN_PSRAVV4SI_MASK:
19717 case IX86_BUILTIN_PSRAVV8DI:
19718 case IX86_BUILTIN_PSRAVV8HI:
19719 case IX86_BUILTIN_PSRAVV8SI:
19720 case IX86_BUILTIN_PSRAVV8SI_MASK:
19721 rcode = ASHIFTRT;
19722 is_vshift = true;
19723 goto do_shift;
19724 case IX86_BUILTIN_PSRLVV16HI:
19725 case IX86_BUILTIN_PSRLVV16SI:
19726 case IX86_BUILTIN_PSRLVV2DI:
19727 case IX86_BUILTIN_PSRLVV2DI_MASK:
19728 case IX86_BUILTIN_PSRLVV32HI:
19729 case IX86_BUILTIN_PSRLVV4DI:
19730 case IX86_BUILTIN_PSRLVV4DI_MASK:
19731 case IX86_BUILTIN_PSRLVV4SI:
19732 case IX86_BUILTIN_PSRLVV4SI_MASK:
19733 case IX86_BUILTIN_PSRLVV8DI:
19734 case IX86_BUILTIN_PSRLVV8HI:
19735 case IX86_BUILTIN_PSRLVV8SI:
19736 case IX86_BUILTIN_PSRLVV8SI_MASK:
19737 rcode = LSHIFTRT;
19738 is_vshift = true;
19739 goto do_shift;
19740
19741 do_shift:
19742 gcc_assert (n_args >= 2);
19743 if (!gimple_call_lhs (gs: stmt))
19744 {
19745 gsi_replace (gsi, gimple_build_nop (), false);
19746 return true;
19747 }
19748 arg0 = gimple_call_arg (gs: stmt, index: 0);
19749 arg1 = gimple_call_arg (gs: stmt, index: 1);
19750 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19751 /* For masked shift, only optimize if the mask is all ones. */
19752 if (n_args > 2
19753 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19754 break;
19755 if (is_vshift)
19756 {
19757 if (TREE_CODE (arg1) != VECTOR_CST)
19758 break;
19759 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19760 if (integer_zerop (arg1))
19761 count = 0;
19762 else if (rcode == ASHIFTRT)
19763 break;
19764 else
19765 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19766 {
19767 tree elt = VECTOR_CST_ELT (arg1, i);
19768 if (!wi::neg_p (x: wi::to_wide (t: elt))
19769 && wi::to_widest (t: elt) < count)
19770 return false;
19771 }
19772 }
19773 else
19774 {
19775 arg1 = ix86_vector_shift_count (arg1);
19776 if (!arg1)
19777 break;
19778 count = tree_to_uhwi (arg1);
19779 }
19780 if (count == 0)
19781 {
19782 /* Just return the first argument for shift by 0. */
19783 loc = gimple_location (g: stmt);
19784 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19785 gimple_set_location (g, location: loc);
19786 gsi_replace (gsi, g, false);
19787 return true;
19788 }
19789 if (rcode != ASHIFTRT
19790 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
19791 {
19792 /* For shift counts equal or greater than precision, except for
19793 arithmetic right shift the result is zero. */
19794 loc = gimple_location (g: stmt);
19795 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19796 build_zero_cst (TREE_TYPE (arg0)));
19797 gimple_set_location (g, location: loc);
19798 gsi_replace (gsi, g, false);
19799 return true;
19800 }
19801 break;
19802
19803 case IX86_BUILTIN_SHUFPD512:
19804 case IX86_BUILTIN_SHUFPS512:
19805 case IX86_BUILTIN_SHUFPD:
19806 case IX86_BUILTIN_SHUFPD256:
19807 case IX86_BUILTIN_SHUFPS:
19808 case IX86_BUILTIN_SHUFPS256:
19809 arg0 = gimple_call_arg (gs: stmt, index: 0);
19810 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19811 /* This is masked shuffle. Only optimize if the mask is all ones. */
19812 if (n_args > 3
19813 && !ix86_masked_all_ones (elems,
19814 arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19815 break;
19816 arg2 = gimple_call_arg (gs: stmt, index: 2);
19817 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt))
19818 {
19819 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
19820 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19821 if (shuffle_mask > 255)
19822 return false;
19823
19824 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
19825 loc = gimple_location (g: stmt);
19826 tree itype = (imode == E_DFmode
19827 ? long_long_integer_type_node : integer_type_node);
19828 tree vtype = build_vector_type (itype, elems);
19829 tree_vector_builder elts (vtype, elems, 1);
19830
19831
19832 /* Transform integer shuffle_mask to vector perm_mask which
19833 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19834 for (unsigned i = 0; i != elems; i++)
19835 {
19836 unsigned sel_idx;
19837 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
19838 provide 2 select constrols for each element of the
19839 destination. */
19840 if (imode == E_DFmode)
19841 sel_idx = (i & 1) * elems + (i & ~1)
19842 + ((shuffle_mask >> i) & 1);
19843 else
19844 {
19845 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
19846 controls for each element of the destination. */
19847 unsigned j = i % 4;
19848 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
19849 + ((shuffle_mask >> 2 * j) & 3);
19850 }
19851 elts.quick_push (obj: build_int_cst (itype, sel_idx));
19852 }
19853
19854 tree perm_mask = elts.build ();
19855 arg1 = gimple_call_arg (gs: stmt, index: 1);
19856 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19857 VEC_PERM_EXPR,
19858 arg0, arg1, perm_mask);
19859 gimple_set_location (g, location: loc);
19860 gsi_replace (gsi, g, false);
19861 return true;
19862 }
19863 // Do not error yet, the constant could be propagated later?
19864 break;
19865
19866 case IX86_BUILTIN_PABSB:
19867 case IX86_BUILTIN_PABSW:
19868 case IX86_BUILTIN_PABSD:
19869 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
19870 if (!TARGET_MMX_WITH_SSE)
19871 break;
19872 /* FALLTHRU. */
19873 case IX86_BUILTIN_PABSB128:
19874 case IX86_BUILTIN_PABSB256:
19875 case IX86_BUILTIN_PABSB512:
19876 case IX86_BUILTIN_PABSW128:
19877 case IX86_BUILTIN_PABSW256:
19878 case IX86_BUILTIN_PABSW512:
19879 case IX86_BUILTIN_PABSD128:
19880 case IX86_BUILTIN_PABSD256:
19881 case IX86_BUILTIN_PABSD512:
19882 case IX86_BUILTIN_PABSQ128:
19883 case IX86_BUILTIN_PABSQ256:
19884 case IX86_BUILTIN_PABSQ512:
19885 case IX86_BUILTIN_PABSB128_MASK:
19886 case IX86_BUILTIN_PABSB256_MASK:
19887 case IX86_BUILTIN_PABSW128_MASK:
19888 case IX86_BUILTIN_PABSW256_MASK:
19889 case IX86_BUILTIN_PABSD128_MASK:
19890 case IX86_BUILTIN_PABSD256_MASK:
19891 gcc_assert (n_args >= 1);
19892 if (!gimple_call_lhs (gs: stmt))
19893 {
19894 gsi_replace (gsi, gimple_build_nop (), false);
19895 return true;
19896 }
19897 arg0 = gimple_call_arg (gs: stmt, index: 0);
19898 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19899 /* For masked ABS, only optimize if the mask is all ones. */
19900 if (n_args > 1
19901 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19902 break;
19903 {
19904 tree utype, ures, vce;
19905 utype = unsigned_type_for (TREE_TYPE (arg0));
19906 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
19907 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
19908 ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0);
19909 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19910 loc = gimple_location (g: stmt);
19911 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
19912 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19913 VIEW_CONVERT_EXPR, vce);
19914 gsi_replace (gsi, g, false);
19915 }
19916 return true;
19917
19918 case IX86_BUILTIN_MINPS:
19919 case IX86_BUILTIN_MINPD:
19920 case IX86_BUILTIN_MINPS256:
19921 case IX86_BUILTIN_MINPD256:
19922 case IX86_BUILTIN_MINPS512:
19923 case IX86_BUILTIN_MINPD512:
19924 case IX86_BUILTIN_MINPS128_MASK:
19925 case IX86_BUILTIN_MINPD128_MASK:
19926 case IX86_BUILTIN_MINPS256_MASK:
19927 case IX86_BUILTIN_MINPD256_MASK:
19928 case IX86_BUILTIN_MINPH128_MASK:
19929 case IX86_BUILTIN_MINPH256_MASK:
19930 case IX86_BUILTIN_MINPH512_MASK:
19931 tcode = LT_EXPR;
19932 goto do_minmax;
19933
19934 case IX86_BUILTIN_MAXPS:
19935 case IX86_BUILTIN_MAXPD:
19936 case IX86_BUILTIN_MAXPS256:
19937 case IX86_BUILTIN_MAXPD256:
19938 case IX86_BUILTIN_MAXPS512:
19939 case IX86_BUILTIN_MAXPD512:
19940 case IX86_BUILTIN_MAXPS128_MASK:
19941 case IX86_BUILTIN_MAXPD128_MASK:
19942 case IX86_BUILTIN_MAXPS256_MASK:
19943 case IX86_BUILTIN_MAXPD256_MASK:
19944 case IX86_BUILTIN_MAXPH128_MASK:
19945 case IX86_BUILTIN_MAXPH256_MASK:
19946 case IX86_BUILTIN_MAXPH512_MASK:
19947 tcode = GT_EXPR;
19948 do_minmax:
19949 gcc_assert (n_args >= 2);
19950 /* Without SSE4.1 we often aren't able to pattern match it back to the
19951 desired instruction. */
19952 if (!gimple_call_lhs (gs: stmt) || !optimize || !TARGET_SSE4_1)
19953 break;
19954 arg0 = gimple_call_arg (gs: stmt, index: 0);
19955 arg1 = gimple_call_arg (gs: stmt, index: 1);
19956 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19957 /* For masked minmax, only optimize if the mask is all ones. */
19958 if (n_args > 2
19959 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: 3)))
19960 break;
19961 if (n_args >= 5)
19962 {
19963 tree arg4 = gimple_call_arg (gs: stmt, index: 4);
19964 if (!tree_fits_uhwi_p (arg4))
19965 break;
19966 if (tree_to_uhwi (arg4) == 4)
19967 /* Ok. */;
19968 else if (tree_to_uhwi (arg4) != 8)
19969 /* Invalid round argument. */
19970 break;
19971 else if (HONOR_NANS (arg0))
19972 /* Lowering to comparison would raise exceptions which
19973 shouldn't be raised. */
19974 break;
19975 }
19976 {
19977 tree type = truth_type_for (TREE_TYPE (arg0));
19978 tree cmpres = gimple_build (seq: &stmts, code: tcode, type, ops: arg0, ops: arg1);
19979 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19980 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19981 VEC_COND_EXPR, cmpres, arg0, arg1);
19982 gsi_replace (gsi, g, false);
19983 }
19984 return true;
19985
19986 default:
19987 break;
19988 }
19989
19990 return false;
19991}
19992
19993/* Handler for an SVML-style interface to
19994 a library with vectorized intrinsics. */
19995
19996tree
19997ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
19998{
19999 char name[20];
20000 tree fntype, new_fndecl, args;
20001 unsigned arity;
20002 const char *bname;
20003 machine_mode el_mode, in_mode;
20004 int n, in_n;
20005
20006 /* The SVML is suitable for unsafe math only. */
20007 if (!flag_unsafe_math_optimizations)
20008 return NULL_TREE;
20009
20010 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20011 n = TYPE_VECTOR_SUBPARTS (node: type_out);
20012 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20013 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
20014 if (el_mode != in_mode
20015 || n != in_n)
20016 return NULL_TREE;
20017
20018 switch (fn)
20019 {
20020 CASE_CFN_EXP:
20021 CASE_CFN_LOG:
20022 CASE_CFN_LOG10:
20023 CASE_CFN_POW:
20024 CASE_CFN_TANH:
20025 CASE_CFN_TAN:
20026 CASE_CFN_ATAN:
20027 CASE_CFN_ATAN2:
20028 CASE_CFN_ATANH:
20029 CASE_CFN_CBRT:
20030 CASE_CFN_SINH:
20031 CASE_CFN_SIN:
20032 CASE_CFN_ASINH:
20033 CASE_CFN_ASIN:
20034 CASE_CFN_COSH:
20035 CASE_CFN_COS:
20036 CASE_CFN_ACOSH:
20037 CASE_CFN_ACOS:
20038 if ((el_mode != DFmode || n != 2)
20039 && (el_mode != SFmode || n != 4))
20040 return NULL_TREE;
20041 break;
20042
20043 default:
20044 return NULL_TREE;
20045 }
20046
20047 tree fndecl = mathfn_built_in (el_mode == DFmode
20048 ? double_type_node : float_type_node, fn);
20049 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20050
20051 if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF)
20052 strcpy (dest: name, src: "vmlsLn4");
20053 else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG)
20054 strcpy (dest: name, src: "vmldLn2");
20055 else if (n == 4)
20056 {
20057 sprintf (s: name, format: "vmls%s", bname+10);
20058 name[strlen (s: name)-1] = '4';
20059 }
20060 else
20061 sprintf (s: name, format: "vmld%s2", bname+10);
20062
20063 /* Convert to uppercase. */
20064 name[4] &= ~0x20;
20065
20066 arity = 0;
20067 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20068 arity++;
20069
20070 if (arity == 1)
20071 fntype = build_function_type_list (type_out, type_in, NULL);
20072 else
20073 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20074
20075 /* Build a function declaration for the vectorized function. */
20076 new_fndecl = build_decl (BUILTINS_LOCATION,
20077 FUNCTION_DECL, get_identifier (name), fntype);
20078 TREE_PUBLIC (new_fndecl) = 1;
20079 DECL_EXTERNAL (new_fndecl) = 1;
20080 DECL_IS_NOVOPS (new_fndecl) = 1;
20081 TREE_READONLY (new_fndecl) = 1;
20082
20083 return new_fndecl;
20084}
20085
20086/* Handler for an ACML-style interface to
20087 a library with vectorized intrinsics. */
20088
20089tree
20090ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
20091{
20092 char name[20] = "__vr.._";
20093 tree fntype, new_fndecl, args;
20094 unsigned arity;
20095 const char *bname;
20096 machine_mode el_mode, in_mode;
20097 int n, in_n;
20098
20099 /* The ACML is 64bits only and suitable for unsafe math only as
20100 it does not correctly support parts of IEEE with the required
20101 precision such as denormals. */
20102 if (!TARGET_64BIT
20103 || !flag_unsafe_math_optimizations)
20104 return NULL_TREE;
20105
20106 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20107 n = TYPE_VECTOR_SUBPARTS (node: type_out);
20108 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20109 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
20110 if (el_mode != in_mode
20111 || n != in_n)
20112 return NULL_TREE;
20113
20114 switch (fn)
20115 {
20116 CASE_CFN_SIN:
20117 CASE_CFN_COS:
20118 CASE_CFN_EXP:
20119 CASE_CFN_LOG:
20120 CASE_CFN_LOG2:
20121 CASE_CFN_LOG10:
20122 if (el_mode == DFmode && n == 2)
20123 {
20124 name[4] = 'd';
20125 name[5] = '2';
20126 }
20127 else if (el_mode == SFmode && n == 4)
20128 {
20129 name[4] = 's';
20130 name[5] = '4';
20131 }
20132 else
20133 return NULL_TREE;
20134 break;
20135
20136 default:
20137 return NULL_TREE;
20138 }
20139
20140 tree fndecl = mathfn_built_in (el_mode == DFmode
20141 ? double_type_node : float_type_node, fn);
20142 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20143 sprintf (s: name + 7, format: "%s", bname+10);
20144
20145 arity = 0;
20146 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20147 arity++;
20148
20149 if (arity == 1)
20150 fntype = build_function_type_list (type_out, type_in, NULL);
20151 else
20152 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20153
20154 /* Build a function declaration for the vectorized function. */
20155 new_fndecl = build_decl (BUILTINS_LOCATION,
20156 FUNCTION_DECL, get_identifier (name), fntype);
20157 TREE_PUBLIC (new_fndecl) = 1;
20158 DECL_EXTERNAL (new_fndecl) = 1;
20159 DECL_IS_NOVOPS (new_fndecl) = 1;
20160 TREE_READONLY (new_fndecl) = 1;
20161
20162 return new_fndecl;
20163}
20164
20165/* Handler for an AOCL-LibM-style interface to
20166 a library with vectorized intrinsics. */
20167
20168tree
20169ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
20170{
20171 char name[20] = "amd_vr";
20172 int name_len = 6;
20173 tree fntype, new_fndecl, args;
20174 unsigned arity;
20175 const char *bname;
20176 machine_mode el_mode, in_mode;
20177 int n, in_n;
20178
20179 /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only
20180 as it trades off some accuracy for increased performance. */
20181 if (!TARGET_64BIT
20182 || !flag_unsafe_math_optimizations)
20183 return NULL_TREE;
20184
20185 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20186 n = TYPE_VECTOR_SUBPARTS (node: type_out);
20187 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20188 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
20189 if (el_mode != in_mode
20190 || n != in_n)
20191 return NULL_TREE;
20192
20193 gcc_checking_assert (n > 0);
20194
20195 /* Decide whether there exists a function for the combination of FN, the mode
20196 and the vector width. Return early if it doesn't. */
20197
20198 if (el_mode != DFmode && el_mode != SFmode)
20199 return NULL_TREE;
20200
20201 /* Supported vector widths for given FN and single/double precision. Zeros
20202 are used to fill out unused positions in the arrays. */
20203 static const int supported_n[][2][3] = {
20204 /* Single prec. , Double prec. */
20205 { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */
20206 { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */
20207 { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */
20208 { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */
20209 { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */
20210 { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */
20211 { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */
20212 { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */
20213 { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */
20214 { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */
20215 { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */
20216 { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */
20217 { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */
20218 { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */
20219 { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */
20220 { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */
20221 { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */
20222 { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */
20223 };
20224
20225 /* We cannot simply index the supported_n array with FN since multiple FNs
20226 may correspond to a single operation (see the definitions of these
20227 CASE_CFN_* macros). */
20228 int i;
20229 switch (fn)
20230 {
20231 CASE_CFN_TAN : i = 0; break;
20232 CASE_CFN_EXP : i = 1; break;
20233 CASE_CFN_EXP2 : i = 2; break;
20234 CASE_CFN_LOG : i = 3; break;
20235 CASE_CFN_LOG2 : i = 4; break;
20236 CASE_CFN_COS : i = 5; break;
20237 CASE_CFN_SIN : i = 6; break;
20238 CASE_CFN_POW : i = 7; break;
20239 CASE_CFN_ERF : i = 8; break;
20240 CASE_CFN_ATAN : i = 9; break;
20241 CASE_CFN_LOG10 : i = 10; break;
20242 CASE_CFN_EXP10 : i = 11; break;
20243 CASE_CFN_LOG1P : i = 12; break;
20244 CASE_CFN_ASIN : i = 13; break;
20245 CASE_CFN_ACOS : i = 14; break;
20246 CASE_CFN_TANH : i = 15; break;
20247 CASE_CFN_EXPM1 : i = 16; break;
20248 CASE_CFN_COSH : i = 17; break;
20249 default: return NULL_TREE;
20250 }
20251
20252 int j = el_mode == DFmode;
20253 bool n_is_supported = false;
20254 for (unsigned k = 0; k < 3; k++)
20255 if (supported_n[i][j][k] == n)
20256 {
20257 n_is_supported = true;
20258 break;
20259 }
20260 if (!n_is_supported)
20261 return NULL_TREE;
20262
20263 /* Append the precision and the vector width to the function name we are
20264 constructing. */
20265 name[name_len++] = el_mode == DFmode ? 'd' : 's';
20266 switch (n)
20267 {
20268 case 2:
20269 case 4:
20270 case 8:
20271 name[name_len++] = '0' + n;
20272 break;
20273 case 16:
20274 name[name_len++] = '1';
20275 name[name_len++] = '6';
20276 break;
20277 default:
20278 gcc_unreachable ();
20279 }
20280 name[name_len++] = '_';
20281
20282 /* Append the operation name (steal it from the name of a builtin). */
20283 tree fndecl = mathfn_built_in (el_mode == DFmode
20284 ? double_type_node : float_type_node, fn);
20285 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
20286 sprintf (s: name + name_len, format: "%s", bname + 10);
20287
20288 arity = 0;
20289 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
20290 arity++;
20291
20292 if (arity == 1)
20293 fntype = build_function_type_list (type_out, type_in, NULL);
20294 else
20295 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20296
20297 /* Build a function declaration for the vectorized function. */
20298 new_fndecl = build_decl (BUILTINS_LOCATION,
20299 FUNCTION_DECL, get_identifier (name), fntype);
20300 TREE_PUBLIC (new_fndecl) = 1;
20301 DECL_EXTERNAL (new_fndecl) = 1;
20302 TREE_READONLY (new_fndecl) = 1;
20303
20304 return new_fndecl;
20305}
20306
20307/* Returns a decl of a function that implements scatter store with
20308 register type VECTYPE and index type INDEX_TYPE and SCALE.
20309 Return NULL_TREE if it is not available. */
20310
20311static tree
20312ix86_vectorize_builtin_scatter (const_tree vectype,
20313 const_tree index_type, int scale)
20314{
20315 bool si;
20316 enum ix86_builtins code;
20317
20318 if (!TARGET_AVX512F)
20319 return NULL_TREE;
20320
20321 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
20322 ? !TARGET_USE_SCATTER_2PARTS
20323 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
20324 ? !TARGET_USE_SCATTER_4PARTS
20325 : !TARGET_USE_SCATTER_8PARTS))
20326 return NULL_TREE;
20327
20328 if ((TREE_CODE (index_type) != INTEGER_TYPE
20329 && !POINTER_TYPE_P (index_type))
20330 || (TYPE_MODE (index_type) != SImode
20331 && TYPE_MODE (index_type) != DImode))
20332 return NULL_TREE;
20333
20334 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
20335 return NULL_TREE;
20336
20337 /* v*scatter* insn sign extends index to pointer mode. */
20338 if (TYPE_PRECISION (index_type) < POINTER_SIZE
20339 && TYPE_UNSIGNED (index_type))
20340 return NULL_TREE;
20341
20342 /* Scale can be 1, 2, 4 or 8. */
20343 if (scale <= 0
20344 || scale > 8
20345 || (scale & (scale - 1)) != 0)
20346 return NULL_TREE;
20347
20348 si = TYPE_MODE (index_type) == SImode;
20349 switch (TYPE_MODE (vectype))
20350 {
20351 case E_V8DFmode:
20352 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
20353 break;
20354 case E_V8DImode:
20355 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
20356 break;
20357 case E_V16SFmode:
20358 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
20359 break;
20360 case E_V16SImode:
20361 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
20362 break;
20363 case E_V4DFmode:
20364 if (TARGET_AVX512VL)
20365 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
20366 else
20367 return NULL_TREE;
20368 break;
20369 case E_V4DImode:
20370 if (TARGET_AVX512VL)
20371 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
20372 else
20373 return NULL_TREE;
20374 break;
20375 case E_V8SFmode:
20376 if (TARGET_AVX512VL)
20377 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
20378 else
20379 return NULL_TREE;
20380 break;
20381 case E_V8SImode:
20382 if (TARGET_AVX512VL)
20383 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
20384 else
20385 return NULL_TREE;
20386 break;
20387 case E_V2DFmode:
20388 if (TARGET_AVX512VL)
20389 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
20390 else
20391 return NULL_TREE;
20392 break;
20393 case E_V2DImode:
20394 if (TARGET_AVX512VL)
20395 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
20396 else
20397 return NULL_TREE;
20398 break;
20399 case E_V4SFmode:
20400 if (TARGET_AVX512VL)
20401 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
20402 else
20403 return NULL_TREE;
20404 break;
20405 case E_V4SImode:
20406 if (TARGET_AVX512VL)
20407 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
20408 else
20409 return NULL_TREE;
20410 break;
20411 default:
20412 return NULL_TREE;
20413 }
20414
20415 return get_ix86_builtin (c: code);
20416}
20417
20418/* Return true if it is safe to use the rsqrt optabs to optimize
20419 1.0/sqrt. */
20420
20421static bool
20422use_rsqrt_p (machine_mode mode)
20423{
20424 return ((mode == HFmode
20425 || (TARGET_SSE && TARGET_SSE_MATH))
20426 && flag_finite_math_only
20427 && !flag_trapping_math
20428 && flag_unsafe_math_optimizations);
20429}
20430
20431/* Helper for avx_vpermilps256_operand et al. This is also used by
20432 the expansion functions to turn the parallel back into a mask.
20433 The return value is 0 for no match and the imm8+1 for a match. */
20434
20435int
20436avx_vpermilp_parallel (rtx par, machine_mode mode)
20437{
20438 unsigned i, nelt = GET_MODE_NUNITS (mode);
20439 unsigned mask = 0;
20440 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20441
20442 if (XVECLEN (par, 0) != (int) nelt)
20443 return 0;
20444
20445 /* Validate that all of the elements are constants, and not totally
20446 out of range. Copy the data into an integral array to make the
20447 subsequent checks easier. */
20448 for (i = 0; i < nelt; ++i)
20449 {
20450 rtx er = XVECEXP (par, 0, i);
20451 unsigned HOST_WIDE_INT ei;
20452
20453 if (!CONST_INT_P (er))
20454 return 0;
20455 ei = INTVAL (er);
20456 if (ei >= nelt)
20457 return 0;
20458 ipar[i] = ei;
20459 }
20460
20461 switch (mode)
20462 {
20463 case E_V8DFmode:
20464 /* In the 512-bit DFmode case, we can only move elements within
20465 a 128-bit lane. First fill the second part of the mask,
20466 then fallthru. */
20467 for (i = 4; i < 6; ++i)
20468 {
20469 if (ipar[i] < 4 || ipar[i] >= 6)
20470 return 0;
20471 mask |= (ipar[i] - 4) << i;
20472 }
20473 for (i = 6; i < 8; ++i)
20474 {
20475 if (ipar[i] < 6)
20476 return 0;
20477 mask |= (ipar[i] - 6) << i;
20478 }
20479 /* FALLTHRU */
20480
20481 case E_V4DFmode:
20482 /* In the 256-bit DFmode case, we can only move elements within
20483 a 128-bit lane. */
20484 for (i = 0; i < 2; ++i)
20485 {
20486 if (ipar[i] >= 2)
20487 return 0;
20488 mask |= ipar[i] << i;
20489 }
20490 for (i = 2; i < 4; ++i)
20491 {
20492 if (ipar[i] < 2)
20493 return 0;
20494 mask |= (ipar[i] - 2) << i;
20495 }
20496 break;
20497
20498 case E_V16SFmode:
20499 /* In 512 bit SFmode case, permutation in the upper 256 bits
20500 must mirror the permutation in the lower 256-bits. */
20501 for (i = 0; i < 8; ++i)
20502 if (ipar[i] + 8 != ipar[i + 8])
20503 return 0;
20504 /* FALLTHRU */
20505
20506 case E_V8SFmode:
20507 /* In 256 bit SFmode case, we have full freedom of
20508 movement within the low 128-bit lane, but the high 128-bit
20509 lane must mirror the exact same pattern. */
20510 for (i = 0; i < 4; ++i)
20511 if (ipar[i] + 4 != ipar[i + 4])
20512 return 0;
20513 nelt = 4;
20514 /* FALLTHRU */
20515
20516 case E_V2DFmode:
20517 case E_V4SFmode:
20518 /* In the 128-bit case, we've full freedom in the placement of
20519 the elements from the source operand. */
20520 for (i = 0; i < nelt; ++i)
20521 mask |= ipar[i] << (i * (nelt / 2));
20522 break;
20523
20524 default:
20525 gcc_unreachable ();
20526 }
20527
20528 /* Make sure success has a non-zero value by adding one. */
20529 return mask + 1;
20530}
20531
20532/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20533 the expansion functions to turn the parallel back into a mask.
20534 The return value is 0 for no match and the imm8+1 for a match. */
20535
20536int
20537avx_vperm2f128_parallel (rtx par, machine_mode mode)
20538{
20539 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20540 unsigned mask = 0;
20541 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20542
20543 if (XVECLEN (par, 0) != (int) nelt)
20544 return 0;
20545
20546 /* Validate that all of the elements are constants, and not totally
20547 out of range. Copy the data into an integral array to make the
20548 subsequent checks easier. */
20549 for (i = 0; i < nelt; ++i)
20550 {
20551 rtx er = XVECEXP (par, 0, i);
20552 unsigned HOST_WIDE_INT ei;
20553
20554 if (!CONST_INT_P (er))
20555 return 0;
20556 ei = INTVAL (er);
20557 if (ei >= 2 * nelt)
20558 return 0;
20559 ipar[i] = ei;
20560 }
20561
20562 /* Validate that the halves of the permute are halves. */
20563 for (i = 0; i < nelt2 - 1; ++i)
20564 if (ipar[i] + 1 != ipar[i + 1])
20565 return 0;
20566 for (i = nelt2; i < nelt - 1; ++i)
20567 if (ipar[i] + 1 != ipar[i + 1])
20568 return 0;
20569
20570 /* Reconstruct the mask. */
20571 for (i = 0; i < 2; ++i)
20572 {
20573 unsigned e = ipar[i * nelt2];
20574 if (e % nelt2)
20575 return 0;
20576 e /= nelt2;
20577 mask |= e << (i * 4);
20578 }
20579
20580 /* Make sure success has a non-zero value by adding one. */
20581 return mask + 1;
20582}
20583
20584/* Return a mask of VPTERNLOG operands that do not affect output. */
20585
20586int
20587vpternlog_redundant_operand_mask (rtx pternlog_imm)
20588{
20589 int mask = 0;
20590 int imm8 = INTVAL (pternlog_imm);
20591
20592 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20593 mask |= 1;
20594 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20595 mask |= 2;
20596 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20597 mask |= 4;
20598
20599 return mask;
20600}
20601
20602/* Eliminate false dependencies on operands that do not affect output
20603 by substituting other operands of a VPTERNLOG. */
20604
20605void
20606substitute_vpternlog_operands (rtx *operands)
20607{
20608 int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]);
20609
20610 if (mask & 1) /* The first operand is redundant. */
20611 operands[1] = operands[2];
20612
20613 if (mask & 2) /* The second operand is redundant. */
20614 operands[2] = operands[1];
20615
20616 if (mask & 4) /* The third operand is redundant. */
20617 operands[3] = operands[1];
20618 else if (REG_P (operands[3]))
20619 {
20620 if (mask & 1)
20621 operands[1] = operands[3];
20622 if (mask & 2)
20623 operands[2] = operands[3];
20624 }
20625}
20626
20627/* Return a register priority for hard reg REGNO. */
20628static int
20629ix86_register_priority (int hard_regno)
20630{
20631 /* ebp and r13 as the base always wants a displacement, r12 as the
20632 base always wants an index. So discourage their usage in an
20633 address. */
20634 if (hard_regno == R12_REG || hard_regno == R13_REG)
20635 return 0;
20636 if (hard_regno == BP_REG)
20637 return 1;
20638 /* New x86-64 int registers result in bigger code size. Discourage them. */
20639 if (REX_INT_REGNO_P (hard_regno))
20640 return 2;
20641 if (REX2_INT_REGNO_P (hard_regno))
20642 return 2;
20643 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20644 if (REX_SSE_REGNO_P (hard_regno))
20645 return 2;
20646 if (EXT_REX_SSE_REGNO_P (hard_regno))
20647 return 1;
20648 /* Usage of AX register results in smaller code. Prefer it. */
20649 if (hard_regno == AX_REG)
20650 return 4;
20651 return 3;
20652}
20653
20654/* Implement TARGET_PREFERRED_RELOAD_CLASS.
20655
20656 Put float CONST_DOUBLE in the constant pool instead of fp regs.
20657 QImode must go into class Q_REGS.
20658 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20659 movdf to do mem-to-mem moves through integer regs. */
20660
20661static reg_class_t
20662ix86_preferred_reload_class (rtx x, reg_class_t regclass)
20663{
20664 machine_mode mode = GET_MODE (x);
20665
20666 /* We're only allowed to return a subclass of CLASS. Many of the
20667 following checks fail for NO_REGS, so eliminate that early. */
20668 if (regclass == NO_REGS)
20669 return NO_REGS;
20670
20671 /* All classes can load zeros. */
20672 if (x == CONST0_RTX (mode))
20673 return regclass;
20674
20675 /* Force constants into memory if we are loading a (nonzero) constant into
20676 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20677 instructions to load from a constant. */
20678 if (CONSTANT_P (x)
20679 && (MAYBE_MMX_CLASS_P (regclass)
20680 || MAYBE_SSE_CLASS_P (regclass)
20681 || MAYBE_MASK_CLASS_P (regclass)))
20682 return NO_REGS;
20683
20684 /* Floating-point constants need more complex checks. */
20685 if (CONST_DOUBLE_P (x))
20686 {
20687 /* General regs can load everything. */
20688 if (INTEGER_CLASS_P (regclass))
20689 return regclass;
20690
20691 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20692 zero above. We only want to wind up preferring 80387 registers if
20693 we plan on doing computation with them. */
20694 if (IS_STACK_MODE (mode)
20695 && standard_80387_constant_p (x) > 0)
20696 {
20697 /* Limit class to FP regs. */
20698 if (FLOAT_CLASS_P (regclass))
20699 return FLOAT_REGS;
20700 }
20701
20702 return NO_REGS;
20703 }
20704
20705 /* Prefer SSE if we can use them for math. Also allow integer regs
20706 when moves between register units are cheap. */
20707 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20708 {
20709 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20710 && TARGET_INTER_UNIT_MOVES_TO_VEC
20711 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20712 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20713 else
20714 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20715 }
20716
20717 /* Generally when we see PLUS here, it's the function invariant
20718 (plus soft-fp const_int). Which can only be computed into general
20719 regs. */
20720 if (GET_CODE (x) == PLUS)
20721 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20722
20723 /* QImode constants are easy to load, but non-constant QImode data
20724 must go into Q_REGS or ALL_MASK_REGS. */
20725 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20726 {
20727 if (Q_CLASS_P (regclass))
20728 return regclass;
20729 else if (reg_class_subset_p (Q_REGS, regclass))
20730 return Q_REGS;
20731 else if (MASK_CLASS_P (regclass))
20732 return regclass;
20733 else
20734 return NO_REGS;
20735 }
20736
20737 return regclass;
20738}
20739
20740/* Discourage putting floating-point values in SSE registers unless
20741 SSE math is being used, and likewise for the 387 registers. */
20742static reg_class_t
20743ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20744{
20745 /* Restrict the output reload class to the register bank that we are doing
20746 math on. If we would like not to return a subset of CLASS, reject this
20747 alternative: if reload cannot do this, it will still use its choice. */
20748 machine_mode mode = GET_MODE (x);
20749 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20750 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20751
20752 if (IS_STACK_MODE (mode))
20753 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20754
20755 return regclass;
20756}
20757
20758static reg_class_t
20759ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20760 machine_mode mode, secondary_reload_info *sri)
20761{
20762 /* Double-word spills from general registers to non-offsettable memory
20763 references (zero-extended addresses) require special handling. */
20764 if (TARGET_64BIT
20765 && MEM_P (x)
20766 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20767 && INTEGER_CLASS_P (rclass)
20768 && !offsettable_memref_p (x))
20769 {
20770 sri->icode = (in_p
20771 ? CODE_FOR_reload_noff_load
20772 : CODE_FOR_reload_noff_store);
20773 /* Add the cost of moving address to a temporary. */
20774 sri->extra_cost = 1;
20775
20776 return NO_REGS;
20777 }
20778
20779 /* QImode spills from non-QI registers require
20780 intermediate register on 32bit targets. */
20781 if (mode == QImode
20782 && ((!TARGET_64BIT && !in_p
20783 && INTEGER_CLASS_P (rclass)
20784 && MAYBE_NON_Q_CLASS_P (rclass))
20785 || (!TARGET_AVX512DQ
20786 && MAYBE_MASK_CLASS_P (rclass))))
20787 {
20788 int regno = true_regnum (x);
20789
20790 /* Return Q_REGS if the operand is in memory. */
20791 if (regno == -1)
20792 return Q_REGS;
20793
20794 return NO_REGS;
20795 }
20796
20797 /* Require movement to gpr, and then store to memory. */
20798 if ((mode == HFmode || mode == HImode || mode == V2QImode
20799 || mode == BFmode)
20800 && !TARGET_SSE4_1
20801 && SSE_CLASS_P (rclass)
20802 && !in_p && MEM_P (x))
20803 {
20804 sri->extra_cost = 1;
20805 return GENERAL_REGS;
20806 }
20807
20808 /* This condition handles corner case where an expression involving
20809 pointers gets vectorized. We're trying to use the address of a
20810 stack slot as a vector initializer.
20811
20812 (set (reg:V2DI 74 [ vect_cst_.2 ])
20813 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20814
20815 Eventually frame gets turned into sp+offset like this:
20816
20817 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20818 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20819 (const_int 392 [0x188]))))
20820
20821 That later gets turned into:
20822
20823 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20824 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20825 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20826
20827 We'll have the following reload recorded:
20828
20829 Reload 0: reload_in (DI) =
20830 (plus:DI (reg/f:DI 7 sp)
20831 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
20832 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20833 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
20834 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
20835 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20836 reload_reg_rtx: (reg:V2DI 22 xmm1)
20837
20838 Which isn't going to work since SSE instructions can't handle scalar
20839 additions. Returning GENERAL_REGS forces the addition into integer
20840 register and reload can handle subsequent reloads without problems. */
20841
20842 if (in_p && GET_CODE (x) == PLUS
20843 && SSE_CLASS_P (rclass)
20844 && SCALAR_INT_MODE_P (mode))
20845 return GENERAL_REGS;
20846
20847 return NO_REGS;
20848}
20849
20850/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
20851
20852static bool
20853ix86_class_likely_spilled_p (reg_class_t rclass)
20854{
20855 switch (rclass)
20856 {
20857 case AREG:
20858 case DREG:
20859 case CREG:
20860 case BREG:
20861 case AD_REGS:
20862 case SIREG:
20863 case DIREG:
20864 case SSE_FIRST_REG:
20865 case FP_TOP_REG:
20866 case FP_SECOND_REG:
20867 return true;
20868
20869 default:
20870 break;
20871 }
20872
20873 return false;
20874}
20875
20876/* Implement TARGET_CALLEE_SAVE_COST. */
20877
20878static int
20879ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
20880 unsigned int, int mem_cost, const HARD_REG_SET &, bool)
20881{
20882 /* Account for the fact that push and pop are shorter and do their
20883 own allocation and deallocation. */
20884 if (GENERAL_REGNO_P (hard_regno))
20885 {
20886 /* push is 1 byte while typical spill is 4-5 bytes.
20887 ??? We probably should adjust size costs accordingly.
20888 Costs are relative to reg-reg move that has 2 bytes for 32bit
20889 and 3 bytes otherwise. Be sure that no cost table sets cost
20890 to 2, so we end up with 0. */
20891 if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
20892 return 1;
20893 return mem_cost - 2;
20894 }
20895 return mem_cost;
20896}
20897
20898/* Return true if a set of DST by the expression SRC should be allowed.
20899 This prevents complex sets of likely_spilled hard regs before split1. */
20900
20901bool
20902ix86_hardreg_mov_ok (rtx dst, rtx src)
20903{
20904 /* Avoid complex sets of likely_spilled hard registers before reload. */
20905 if (REG_P (dst) && HARD_REGISTER_P (dst)
20906 && !REG_P (src) && !MEM_P (src)
20907 && !(VECTOR_MODE_P (GET_MODE (dst))
20908 ? standard_sse_constant_p (x: src, GET_MODE (dst))
20909 : x86_64_immediate_operand (src, GET_MODE (dst)))
20910 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
20911 && ix86_pre_reload_split ())
20912 return false;
20913 return true;
20914}
20915
20916/* If we are copying between registers from different register sets
20917 (e.g. FP and integer), we may need a memory location.
20918
20919 The function can't work reliably when one of the CLASSES is a class
20920 containing registers from multiple sets. We avoid this by never combining
20921 different sets in a single alternative in the machine description.
20922 Ensure that this constraint holds to avoid unexpected surprises.
20923
20924 When STRICT is false, we are being called from REGISTER_MOVE_COST,
20925 so do not enforce these sanity checks.
20926
20927 To optimize register_move_cost performance, define inline variant. */
20928
20929static inline bool
20930inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20931 reg_class_t class2, int strict)
20932{
20933 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
20934 return false;
20935
20936 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20937 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20938 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20939 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20940 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20941 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
20942 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
20943 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
20944 {
20945 gcc_assert (!strict || lra_in_progress);
20946 return true;
20947 }
20948
20949 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20950 return true;
20951
20952 /* ??? This is a lie. We do have moves between mmx/general, and for
20953 mmx/sse2. But by saying we need secondary memory we discourage the
20954 register allocator from using the mmx registers unless needed. */
20955 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20956 return true;
20957
20958 /* Between mask and general, we have moves no larger than word size. */
20959 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20960 {
20961 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
20962 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20963 return true;
20964 }
20965
20966 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20967 {
20968 /* SSE1 doesn't have any direct moves from other classes. */
20969 if (!TARGET_SSE2)
20970 return true;
20971
20972 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
20973 return true;
20974
20975 /* If the target says that inter-unit moves are more expensive
20976 than moving through memory, then don't generate them. */
20977 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
20978 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
20979 return true;
20980
20981 /* With SSE4.1, *mov{ti,di}_internal supports moves between
20982 SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
20983 if (TARGET_SSE4_1
20984 && (TARGET_64BIT ? mode == TImode : mode == DImode))
20985 return false;
20986
20987 int msize = GET_MODE_SIZE (mode);
20988
20989 /* Between SSE and general, we have moves no larger than word size. */
20990 if (msize > UNITS_PER_WORD)
20991 return true;
20992
20993 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
20994 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
20995 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
20996
20997 if (msize < minsize)
20998 return true;
20999 }
21000
21001 return false;
21002}
21003
21004/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
21005
21006static bool
21007ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
21008 reg_class_t class2)
21009{
21010 return inline_secondary_memory_needed (mode, class1, class2, strict: true);
21011}
21012
21013/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
21014
21015 get_secondary_mem widens integral modes to BITS_PER_WORD.
21016 There is no need to emit full 64 bit move on 64 bit targets
21017 for integral modes that can be moved using 32 bit move. */
21018
21019static machine_mode
21020ix86_secondary_memory_needed_mode (machine_mode mode)
21021{
21022 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
21023 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
21024 return mode;
21025}
21026
21027/* Implement the TARGET_CLASS_MAX_NREGS hook.
21028
21029 On the 80386, this is the size of MODE in words,
21030 except in the FP regs, where a single reg is always enough. */
21031
21032static unsigned char
21033ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
21034{
21035 if (MAYBE_INTEGER_CLASS_P (rclass))
21036 {
21037 if (mode == XFmode)
21038 return (TARGET_64BIT ? 2 : 3);
21039 else if (mode == XCmode)
21040 return (TARGET_64BIT ? 4 : 6);
21041 else
21042 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21043 }
21044 else
21045 {
21046 if (COMPLEX_MODE_P (mode))
21047 return 2;
21048 else
21049 return 1;
21050 }
21051}
21052
21053/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
21054
21055static bool
21056ix86_can_change_mode_class (machine_mode from, machine_mode to,
21057 reg_class_t regclass)
21058{
21059 if (from == to)
21060 return true;
21061
21062 /* x87 registers can't do subreg at all, as all values are reformatted
21063 to extended precision.
21064
21065 ??? middle-end queries mode changes for ALL_REGS and this makes
21066 vec_series_lowpart_p to always return false. We probably should
21067 restrict this to modes supported by i387 and check if it is enabled. */
21068 if (MAYBE_FLOAT_CLASS_P (regclass))
21069 return false;
21070
21071 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21072 {
21073 /* Vector registers do not support QI or HImode loads. If we don't
21074 disallow a change to these modes, reload will assume it's ok to
21075 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21076 the vec_dupv4hi pattern.
21077 NB: SSE2 can load 16bit data to sse register via pinsrw. */
21078 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
21079 if (GET_MODE_SIZE (from) < mov_size
21080 || GET_MODE_SIZE (to) < mov_size)
21081 return false;
21082 }
21083
21084 return true;
21085}
21086
21087/* Return index of MODE in the sse load/store tables. */
21088
21089static inline int
21090sse_store_index (machine_mode mode)
21091{
21092 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
21093 costs to processor_costs, which requires changes to all entries in
21094 processor cost table. */
21095 if (mode == E_HFmode)
21096 mode = E_SFmode;
21097
21098 switch (GET_MODE_SIZE (mode))
21099 {
21100 case 4:
21101 return 0;
21102 case 8:
21103 return 1;
21104 case 16:
21105 return 2;
21106 case 32:
21107 return 3;
21108 case 64:
21109 return 4;
21110 default:
21111 return -1;
21112 }
21113}
21114
21115/* Return the cost of moving data of mode M between a
21116 register and memory. A value of 2 is the default; this cost is
21117 relative to those in `REGISTER_MOVE_COST'.
21118
21119 This function is used extensively by register_move_cost that is used to
21120 build tables at startup. Make it inline in this case.
21121 When IN is 2, return maximum of in and out move cost.
21122
21123 If moving between registers and memory is more expensive than
21124 between two registers, you should define this macro to express the
21125 relative cost.
21126
21127 Model also increased moving costs of QImode registers in non
21128 Q_REGS classes.
21129 */
21130static inline int
21131inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
21132{
21133 int cost;
21134
21135 if (FLOAT_CLASS_P (regclass))
21136 {
21137 int index;
21138 switch (mode)
21139 {
21140 case E_SFmode:
21141 index = 0;
21142 break;
21143 case E_DFmode:
21144 index = 1;
21145 break;
21146 case E_XFmode:
21147 index = 2;
21148 break;
21149 default:
21150 return 100;
21151 }
21152 if (in == 2)
21153 return MAX (ix86_cost->hard_register.fp_load [index],
21154 ix86_cost->hard_register.fp_store [index]);
21155 return in ? ix86_cost->hard_register.fp_load [index]
21156 : ix86_cost->hard_register.fp_store [index];
21157 }
21158 if (SSE_CLASS_P (regclass))
21159 {
21160 int index = sse_store_index (mode);
21161 if (index == -1)
21162 return 100;
21163 if (in == 2)
21164 return MAX (ix86_cost->hard_register.sse_load [index],
21165 ix86_cost->hard_register.sse_store [index]);
21166 return in ? ix86_cost->hard_register.sse_load [index]
21167 : ix86_cost->hard_register.sse_store [index];
21168 }
21169 if (MASK_CLASS_P (regclass))
21170 {
21171 int index;
21172 switch (GET_MODE_SIZE (mode))
21173 {
21174 case 1:
21175 index = 0;
21176 break;
21177 case 2:
21178 index = 1;
21179 break;
21180 /* DImode loads and stores assumed to cost the same as SImode. */
21181 case 4:
21182 case 8:
21183 index = 2;
21184 break;
21185 default:
21186 return 100;
21187 }
21188
21189 if (in == 2)
21190 return MAX (ix86_cost->hard_register.mask_load[index],
21191 ix86_cost->hard_register.mask_store[index]);
21192 return in ? ix86_cost->hard_register.mask_load[2]
21193 : ix86_cost->hard_register.mask_store[2];
21194 }
21195 if (MMX_CLASS_P (regclass))
21196 {
21197 int index;
21198 switch (GET_MODE_SIZE (mode))
21199 {
21200 case 4:
21201 index = 0;
21202 break;
21203 case 8:
21204 index = 1;
21205 break;
21206 default:
21207 return 100;
21208 }
21209 if (in == 2)
21210 return MAX (ix86_cost->hard_register.mmx_load [index],
21211 ix86_cost->hard_register.mmx_store [index]);
21212 return in ? ix86_cost->hard_register.mmx_load [index]
21213 : ix86_cost->hard_register.mmx_store [index];
21214 }
21215 switch (GET_MODE_SIZE (mode))
21216 {
21217 case 1:
21218 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21219 {
21220 if (!in)
21221 return ix86_cost->hard_register.int_store[0];
21222 if (TARGET_PARTIAL_REG_DEPENDENCY
21223 && optimize_function_for_speed_p (cfun))
21224 cost = ix86_cost->hard_register.movzbl_load;
21225 else
21226 cost = ix86_cost->hard_register.int_load[0];
21227 if (in == 2)
21228 return MAX (cost, ix86_cost->hard_register.int_store[0]);
21229 return cost;
21230 }
21231 else
21232 {
21233 if (in == 2)
21234 return MAX (ix86_cost->hard_register.movzbl_load,
21235 ix86_cost->hard_register.int_store[0] + 4);
21236 if (in)
21237 return ix86_cost->hard_register.movzbl_load;
21238 else
21239 return ix86_cost->hard_register.int_store[0] + 4;
21240 }
21241 break;
21242 case 2:
21243 {
21244 int cost;
21245 if (in == 2)
21246 cost = MAX (ix86_cost->hard_register.int_load[1],
21247 ix86_cost->hard_register.int_store[1]);
21248 else
21249 cost = in ? ix86_cost->hard_register.int_load[1]
21250 : ix86_cost->hard_register.int_store[1];
21251
21252 if (mode == E_HFmode)
21253 {
21254 /* Prefer SSE over GPR for HFmode. */
21255 int sse_cost;
21256 int index = sse_store_index (mode);
21257 if (in == 2)
21258 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
21259 ix86_cost->hard_register.sse_store[index]);
21260 else
21261 sse_cost = (in
21262 ? ix86_cost->hard_register.sse_load [index]
21263 : ix86_cost->hard_register.sse_store [index]);
21264 if (sse_cost >= cost)
21265 cost = sse_cost + 1;
21266 }
21267 return cost;
21268 }
21269 default:
21270 if (in == 2)
21271 cost = MAX (ix86_cost->hard_register.int_load[2],
21272 ix86_cost->hard_register.int_store[2]);
21273 else if (in)
21274 cost = ix86_cost->hard_register.int_load[2];
21275 else
21276 cost = ix86_cost->hard_register.int_store[2];
21277 /* Multiply with the number of GPR moves needed. */
21278 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
21279 }
21280}
21281
21282static int
21283ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
21284{
21285 return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0);
21286}
21287
21288
21289/* Return the cost of moving data from a register in class CLASS1 to
21290 one in class CLASS2.
21291
21292 It is not required that the cost always equal 2 when FROM is the same as TO;
21293 on some machines it is expensive to move between registers if they are not
21294 general registers. */
21295
21296static int
21297ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
21298 reg_class_t class2_i)
21299{
21300 enum reg_class class1 = (enum reg_class) class1_i;
21301 enum reg_class class2 = (enum reg_class) class2_i;
21302
21303 /* In case we require secondary memory, compute cost of the store followed
21304 by load. In order to avoid bad register allocation choices, we need
21305 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21306
21307 if (inline_secondary_memory_needed (mode, class1, class2, strict: false))
21308 {
21309 int cost = 1;
21310
21311 cost += inline_memory_move_cost (mode, regclass: class1, in: 2);
21312 cost += inline_memory_move_cost (mode, regclass: class2, in: 2);
21313
21314 /* In case of copying from general_purpose_register we may emit multiple
21315 stores followed by single load causing memory size mismatch stall.
21316 Count this as arbitrarily high cost of 20. */
21317 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
21318 && TARGET_MEMORY_MISMATCH_STALL
21319 && targetm.class_max_nregs (class1, mode)
21320 > targetm.class_max_nregs (class2, mode))
21321 cost += 20;
21322
21323 /* In the case of FP/MMX moves, the registers actually overlap, and we
21324 have to switch modes in order to treat them differently. */
21325 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21326 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21327 cost += 20;
21328
21329 return cost;
21330 }
21331
21332 /* Moves between MMX and non-MMX units require secondary memory. */
21333 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21334 gcc_unreachable ();
21335
21336 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21337 return (SSE_CLASS_P (class1)
21338 ? ix86_cost->hard_register.sse_to_integer
21339 : ix86_cost->hard_register.integer_to_sse);
21340
21341 /* Moves between mask register and GPR. */
21342 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
21343 {
21344 return (MASK_CLASS_P (class1)
21345 ? ix86_cost->hard_register.mask_to_integer
21346 : ix86_cost->hard_register.integer_to_mask);
21347 }
21348 /* Moving between mask registers. */
21349 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
21350 return ix86_cost->hard_register.mask_move;
21351
21352 if (MAYBE_FLOAT_CLASS_P (class1))
21353 return ix86_cost->hard_register.fp_move;
21354 if (MAYBE_SSE_CLASS_P (class1))
21355 {
21356 if (GET_MODE_BITSIZE (mode) <= 128)
21357 return ix86_cost->hard_register.xmm_move;
21358 if (GET_MODE_BITSIZE (mode) <= 256)
21359 return ix86_cost->hard_register.ymm_move;
21360 return ix86_cost->hard_register.zmm_move;
21361 }
21362 if (MAYBE_MMX_CLASS_P (class1))
21363 return ix86_cost->hard_register.mmx_move;
21364 return 2;
21365}
21366
21367/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
21368 words of a value of mode MODE but can be less for certain modes in
21369 special long registers.
21370
21371 Actually there are no two word move instructions for consecutive
21372 registers. And only registers 0-3 may have mov byte instructions
21373 applied to them. */
21374
21375static unsigned int
21376ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
21377{
21378 if (GENERAL_REGNO_P (regno))
21379 {
21380 if (mode == XFmode)
21381 return TARGET_64BIT ? 2 : 3;
21382 if (mode == XCmode)
21383 return TARGET_64BIT ? 4 : 6;
21384 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
21385 }
21386 if (COMPLEX_MODE_P (mode))
21387 return 2;
21388 /* Register pair for mask registers. */
21389 if (mode == P2QImode || mode == P2HImode)
21390 return 2;
21391 if (mode == V64SFmode || mode == V64SImode)
21392 return 4;
21393 return 1;
21394}
21395
21396/* Implement REGMODE_NATURAL_SIZE(MODE). */
21397unsigned int
21398ix86_regmode_natural_size (machine_mode mode)
21399{
21400 if (mode == P2HImode || mode == P2QImode)
21401 return GET_MODE_SIZE (mode) / 2;
21402 return UNITS_PER_WORD;
21403}
21404
21405/* Implement TARGET_HARD_REGNO_MODE_OK. */
21406
21407static bool
21408ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
21409{
21410 /* Flags and only flags can only hold CCmode values. */
21411 if (CC_REGNO_P (regno))
21412 return GET_MODE_CLASS (mode) == MODE_CC;
21413 if (GET_MODE_CLASS (mode) == MODE_CC
21414 || GET_MODE_CLASS (mode) == MODE_RANDOM)
21415 return false;
21416 if (STACK_REGNO_P (regno))
21417 return VALID_FP_MODE_P (mode);
21418 if (MASK_REGNO_P (regno))
21419 {
21420 /* Register pair only starts at even register number. */
21421 if ((mode == P2QImode || mode == P2HImode))
21422 return MASK_PAIR_REGNO_P(regno);
21423
21424 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
21425 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
21426 }
21427
21428 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21429 return false;
21430
21431 if (SSE_REGNO_P (regno))
21432 {
21433 /* We implement the move patterns for all vector modes into and
21434 out of SSE registers, even when no operation instructions
21435 are available. */
21436
21437 /* For AVX-512 we allow, regardless of regno:
21438 - XI mode
21439 - any of 512-bit wide vector mode
21440 - any scalar mode. */
21441 if (TARGET_AVX512F
21442 && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
21443 || VALID_AVX512F_SCALAR_MODE (mode)))
21444 return true;
21445
21446 /* TODO check for QI/HI scalars. */
21447 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21448 if (TARGET_AVX512VL
21449 && (VALID_AVX256_REG_OR_OI_MODE (mode)
21450 || VALID_AVX512VL_128_REG_MODE (mode)))
21451 return true;
21452
21453 /* xmm16-xmm31 are only available for AVX-512. */
21454 if (EXT_REX_SSE_REGNO_P (regno))
21455 return false;
21456
21457 /* OImode and AVX modes are available only when AVX is enabled. */
21458 return ((TARGET_AVX
21459 && VALID_AVX256_REG_OR_OI_MODE (mode))
21460 || VALID_SSE_REG_MODE (mode)
21461 || VALID_SSE2_REG_MODE (mode)
21462 || VALID_MMX_REG_MODE (mode)
21463 || VALID_MMX_REG_MODE_3DNOW (mode));
21464 }
21465 if (MMX_REGNO_P (regno))
21466 {
21467 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21468 so if the register is available at all, then we can move data of
21469 the given mode into or out of it. */
21470 return (VALID_MMX_REG_MODE (mode)
21471 || VALID_MMX_REG_MODE_3DNOW (mode));
21472 }
21473
21474 if (mode == QImode)
21475 {
21476 /* Take care for QImode values - they can be in non-QI regs,
21477 but then they do cause partial register stalls. */
21478 if (ANY_QI_REGNO_P (regno))
21479 return true;
21480 if (!TARGET_PARTIAL_REG_STALL)
21481 return true;
21482 /* LRA checks if the hard register is OK for the given mode.
21483 QImode values can live in non-QI regs, so we allow all
21484 registers here. */
21485 if (lra_in_progress)
21486 return true;
21487 return !can_create_pseudo_p ();
21488 }
21489 /* We handle both integer and floats in the general purpose registers. */
21490 else if (VALID_INT_MODE_P (mode)
21491 || VALID_FP_MODE_P (mode))
21492 return true;
21493 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21494 on to use that value in smaller contexts, this can easily force a
21495 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21496 supporting DImode, allow it. */
21497 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21498 return true;
21499
21500 return false;
21501}
21502
21503/* Implement TARGET_INSN_CALLEE_ABI. */
21504
21505const predefined_function_abi &
21506ix86_insn_callee_abi (const rtx_insn *insn)
21507{
21508 unsigned int abi_id = 0;
21509 rtx pat = PATTERN (insn);
21510 if (vzeroupper_pattern (pat, VOIDmode))
21511 abi_id = ABI_VZEROUPPER;
21512
21513 return function_abis[abi_id];
21514}
21515
21516/* Initialize function_abis with corresponding abi_id,
21517 currently only handle vzeroupper. */
21518void
21519ix86_initialize_callee_abi (unsigned int abi_id)
21520{
21521 gcc_assert (abi_id == ABI_VZEROUPPER);
21522 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21523 if (!vzeroupper_abi.initialized_p ())
21524 {
21525 HARD_REG_SET full_reg_clobbers;
21526 CLEAR_HARD_REG_SET (set&: full_reg_clobbers);
21527 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21528 }
21529}
21530
21531void
21532ix86_expand_avx_vzeroupper (void)
21533{
21534 /* Initialize vzeroupper_abi here. */
21535 ix86_initialize_callee_abi (ABI_VZEROUPPER);
21536 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21537 /* Return false for non-local goto in can_nonlocal_goto. */
21538 make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN);
21539 /* Flag used for call_insn indicates it's a fake call. */
21540 RTX_FLAG (insn, used) = 1;
21541}
21542
21543
21544/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21545 saves SSE registers across calls is Win64 (thus no need to check the
21546 current ABI here), and with AVX enabled Win64 only guarantees that
21547 the low 16 bytes are saved. */
21548
21549static bool
21550ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21551 machine_mode mode)
21552{
21553 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21554 if (abi_id == ABI_VZEROUPPER)
21555 return (GET_MODE_SIZE (mode) > 16
21556 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21557 || LEGACY_SSE_REGNO_P (regno)));
21558
21559 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21560}
21561
21562/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21563 tieable integer mode. */
21564
21565static bool
21566ix86_tieable_integer_mode_p (machine_mode mode)
21567{
21568 switch (mode)
21569 {
21570 case E_HImode:
21571 case E_SImode:
21572 return true;
21573
21574 case E_QImode:
21575 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21576
21577 case E_DImode:
21578 return TARGET_64BIT;
21579
21580 default:
21581 return false;
21582 }
21583}
21584
21585/* Implement TARGET_MODES_TIEABLE_P.
21586
21587 Return true if MODE1 is accessible in a register that can hold MODE2
21588 without copying. That is, all register classes that can hold MODE2
21589 can also hold MODE1. */
21590
21591static bool
21592ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21593{
21594 if (mode1 == mode2)
21595 return true;
21596
21597 if (ix86_tieable_integer_mode_p (mode: mode1)
21598 && ix86_tieable_integer_mode_p (mode: mode2))
21599 return true;
21600
21601 /* MODE2 being XFmode implies fp stack or general regs, which means we
21602 can tie any smaller floating point modes to it. Note that we do not
21603 tie this with TFmode. */
21604 if (mode2 == XFmode)
21605 return mode1 == SFmode || mode1 == DFmode;
21606
21607 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21608 that we can tie it with SFmode. */
21609 if (mode2 == DFmode)
21610 return mode1 == SFmode;
21611
21612 /* If MODE2 is only appropriate for an SSE register, then tie with
21613 any vector modes or scalar floating point modes acceptable to SSE
21614 registers, excluding scalar integer modes with SUBREG:
21615 (subreg:QI (reg:TI 99) 0))
21616 (subreg:HI (reg:TI 99) 0))
21617 (subreg:SI (reg:TI 99) 0))
21618 (subreg:DI (reg:TI 99) 0))
21619 to avoid unnecessary move from SSE register to integer register.
21620 */
21621 if (GET_MODE_SIZE (mode2) >= 16
21622 && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
21623 || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
21624 && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
21625 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
21626 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1);
21627
21628 /* If MODE2 is appropriate for an MMX register, then tie
21629 with any other mode acceptable to MMX registers. */
21630 if (GET_MODE_SIZE (mode2) == 8
21631 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2))
21632 return (GET_MODE_SIZE (mode1) == 8
21633 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1));
21634
21635 /* SCmode and DImode can be tied. */
21636 if ((mode1 == E_SCmode && mode2 == E_DImode)
21637 || (mode1 == E_DImode && mode2 == E_SCmode))
21638 return TARGET_64BIT;
21639
21640 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21641 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21642 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21643 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21644 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21645 return true;
21646
21647 return false;
21648}
21649
21650/* Return the cost of moving between two registers of mode MODE. */
21651
21652static int
21653ix86_set_reg_reg_cost (machine_mode mode)
21654{
21655 unsigned int units = UNITS_PER_WORD;
21656
21657 switch (GET_MODE_CLASS (mode))
21658 {
21659 default:
21660 break;
21661
21662 case MODE_CC:
21663 units = GET_MODE_SIZE (CCmode);
21664 break;
21665
21666 case MODE_FLOAT:
21667 if ((TARGET_SSE && mode == TFmode)
21668 || (TARGET_80387 && mode == XFmode)
21669 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
21670 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
21671 units = GET_MODE_SIZE (mode);
21672 break;
21673
21674 case MODE_COMPLEX_FLOAT:
21675 if ((TARGET_SSE && mode == TCmode)
21676 || (TARGET_80387 && mode == XCmode)
21677 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
21678 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
21679 units = GET_MODE_SIZE (mode);
21680 break;
21681
21682 case MODE_VECTOR_INT:
21683 case MODE_VECTOR_FLOAT:
21684 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
21685 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21686 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21687 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21688 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21689 && VALID_MMX_REG_MODE (mode)))
21690 units = GET_MODE_SIZE (mode);
21691 }
21692
21693 /* Return the cost of moving between two registers of mode MODE,
21694 assuming that the move will be in pieces of at most UNITS bytes. */
21695 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21696}
21697
21698/* Return cost of vector operation in MODE given that scalar version has
21699 COST. */
21700
21701static int
21702ix86_vec_cost (machine_mode mode, int cost)
21703{
21704 if (!VECTOR_MODE_P (mode))
21705 return cost;
21706
21707 if (GET_MODE_BITSIZE (mode) == 128
21708 && TARGET_SSE_SPLIT_REGS)
21709 return cost * GET_MODE_BITSIZE (mode) / 64;
21710 else if (GET_MODE_BITSIZE (mode) > 128
21711 && TARGET_AVX256_SPLIT_REGS)
21712 return cost * GET_MODE_BITSIZE (mode) / 128;
21713 else if (GET_MODE_BITSIZE (mode) > 256
21714 && TARGET_AVX512_SPLIT_REGS)
21715 return cost * GET_MODE_BITSIZE (mode) / 256;
21716 return cost;
21717}
21718
21719/* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21720 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21721static int
21722ix86_widen_mult_cost (const struct processor_costs *cost,
21723 enum machine_mode mode, bool uns_p)
21724{
21725 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21726 int extra_cost = 0;
21727 int basic_cost = 0;
21728 switch (mode)
21729 {
21730 case V8HImode:
21731 case V16HImode:
21732 if (!uns_p || mode == V16HImode)
21733 extra_cost = cost->sse_op * 2;
21734 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21735 break;
21736 case V4SImode:
21737 case V8SImode:
21738 /* pmulhw/pmullw can be used. */
21739 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21740 break;
21741 case V2DImode:
21742 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21743 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21744 if (!TARGET_SSE4_1 && !uns_p)
21745 extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
21746 + cost->sse_op * 2;
21747 /* Fallthru. */
21748 case V4DImode:
21749 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21750 break;
21751 default:
21752 /* Not implemented. */
21753 return 100;
21754 }
21755 return ix86_vec_cost (mode, cost: basic_cost + extra_cost);
21756}
21757
21758/* Return cost of multiplication in MODE. */
21759
21760static int
21761ix86_multiplication_cost (const struct processor_costs *cost,
21762 enum machine_mode mode)
21763{
21764 machine_mode inner_mode = mode;
21765 if (VECTOR_MODE_P (mode))
21766 inner_mode = GET_MODE_INNER (mode);
21767
21768 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
21769 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21770 else if (X87_FLOAT_MODE_P (mode))
21771 return cost->fmul;
21772 else if (FLOAT_MODE_P (mode))
21773 return ix86_vec_cost (mode,
21774 cost: inner_mode == DFmode ? cost->mulsd : cost->mulss);
21775 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21776 {
21777 int nmults, nops;
21778 /* Cost of reading the memory. */
21779 int extra;
21780
21781 switch (mode)
21782 {
21783 case V4QImode:
21784 case V8QImode:
21785 /* Partial V*QImode is emulated with 4-6 insns. */
21786 nmults = 1;
21787 nops = 3;
21788 extra = 0;
21789
21790 if (TARGET_AVX512BW && TARGET_AVX512VL)
21791 ;
21792 else if (TARGET_AVX2)
21793 nops += 2;
21794 else if (TARGET_XOP)
21795 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21796 else
21797 {
21798 nops += 1;
21799 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21800 }
21801 goto do_qimode;
21802
21803 case V16QImode:
21804 /* V*QImode is emulated with 4-11 insns. */
21805 nmults = 1;
21806 nops = 3;
21807 extra = 0;
21808
21809 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
21810 {
21811 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
21812 nops += 3;
21813 }
21814 else if (TARGET_XOP)
21815 {
21816 nmults += 1;
21817 nops += 2;
21818 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21819 }
21820 else
21821 {
21822 nmults += 1;
21823 nops += 4;
21824 extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
21825 }
21826 goto do_qimode;
21827
21828 case V32QImode:
21829 nmults = 1;
21830 nops = 3;
21831 extra = 0;
21832
21833 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
21834 {
21835 nmults += 1;
21836 nops += 4;
21837 /* 2 loads, so no division by 2. */
21838 extra += COSTS_N_INSNS (cost->sse_load[3]);
21839 }
21840 goto do_qimode;
21841
21842 case V64QImode:
21843 nmults = 2;
21844 nops = 9;
21845 /* 2 loads of each size, so no division by 2. */
21846 extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
21847
21848 do_qimode:
21849 return ix86_vec_cost (mode, cost: cost->mulss * nmults
21850 + cost->sse_op * nops) + extra;
21851
21852 case V4SImode:
21853 /* pmulld is used in this case. No emulation is needed. */
21854 if (TARGET_SSE4_1)
21855 goto do_native;
21856 /* V4SImode is emulated with 7 insns. */
21857 else
21858 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5);
21859
21860 case V2DImode:
21861 case V4DImode:
21862 /* vpmullq is used in this case. No emulation is needed. */
21863 if (TARGET_AVX512DQ && TARGET_AVX512VL)
21864 goto do_native;
21865 /* V*DImode is emulated with 6-8 insns. */
21866 else if (TARGET_XOP && mode == V2DImode)
21867 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4);
21868 /* FALLTHRU */
21869 case V8DImode:
21870 /* vpmullq is used in this case. No emulation is needed. */
21871 if (TARGET_AVX512DQ && mode == V8DImode)
21872 goto do_native;
21873 else
21874 return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5);
21875
21876 default:
21877 do_native:
21878 return ix86_vec_cost (mode, cost: cost->mulss);
21879 }
21880 }
21881 else
21882 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
21883}
21884
21885/* Return cost of multiplication in MODE. */
21886
21887static int
21888ix86_division_cost (const struct processor_costs *cost,
21889 enum machine_mode mode)
21890{
21891 machine_mode inner_mode = mode;
21892 if (VECTOR_MODE_P (mode))
21893 inner_mode = GET_MODE_INNER (mode);
21894
21895 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
21896 return inner_mode == DFmode ? cost->divsd : cost->divss;
21897 else if (X87_FLOAT_MODE_P (mode))
21898 return cost->fdiv;
21899 else if (FLOAT_MODE_P (mode))
21900 return ix86_vec_cost (mode,
21901 cost: inner_mode == DFmode ? cost->divsd : cost->divss);
21902 else
21903 return cost->divide[MODE_INDEX (mode)];
21904}
21905
21906/* Return cost of shift in MODE.
21907 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
21908 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
21909 if op1 is a result of subreg.
21910
21911 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
21912
21913static int
21914ix86_shift_rotate_cost (const struct processor_costs *cost,
21915 enum rtx_code code,
21916 enum machine_mode mode, bool constant_op1,
21917 HOST_WIDE_INT op1_val,
21918 bool and_in_op1,
21919 bool shift_and_truncate,
21920 bool *skip_op0, bool *skip_op1)
21921{
21922 if (skip_op0)
21923 *skip_op0 = *skip_op1 = false;
21924
21925 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21926 {
21927 int count;
21928 /* Cost of reading the memory. */
21929 int extra;
21930
21931 switch (mode)
21932 {
21933 case V4QImode:
21934 case V8QImode:
21935 if (TARGET_AVX2)
21936 /* Use vpbroadcast. */
21937 extra = cost->sse_op;
21938 else
21939 extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
21940
21941 if (constant_op1)
21942 {
21943 if (code == ASHIFTRT)
21944 {
21945 count = 4;
21946 extra *= 2;
21947 }
21948 else
21949 count = 2;
21950 }
21951 else if (TARGET_AVX512BW && TARGET_AVX512VL)
21952 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
21953 else if (TARGET_SSE4_1)
21954 count = 5;
21955 else if (code == ASHIFTRT)
21956 count = 6;
21957 else
21958 count = 5;
21959 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
21960
21961 case V16QImode:
21962 if (TARGET_XOP)
21963 {
21964 /* For XOP we use vpshab, which requires a broadcast of the
21965 value to the variable shift insn. For constants this
21966 means a V16Q const in mem; even when we can perform the
21967 shift with one insn set the cost to prefer paddb. */
21968 if (constant_op1)
21969 {
21970 extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
21971 return ix86_vec_cost (mode, cost: cost->sse_op) + extra;
21972 }
21973 else
21974 {
21975 count = (code == ASHIFT) ? 3 : 4;
21976 return ix86_vec_cost (mode, cost: cost->sse_op * count);
21977 }
21978 }
21979 /* FALLTHRU */
21980 case V32QImode:
21981 if (TARGET_AVX2)
21982 /* Use vpbroadcast. */
21983 extra = cost->sse_op;
21984 else
21985 extra = COSTS_N_INSNS (mode == V16QImode
21986 ? cost->sse_load[2]
21987 : cost->sse_load[3]) / 2;
21988
21989 if (constant_op1)
21990 {
21991 if (code == ASHIFTRT)
21992 {
21993 count = 4;
21994 extra *= 2;
21995 }
21996 else
21997 count = 2;
21998 }
21999 else if (TARGET_AVX512BW
22000 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
22001 || (mode == V16QImode && TARGET_AVX512VL
22002 && !TARGET_PREFER_AVX128)))
22003 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
22004 else if (TARGET_AVX2
22005 && mode == V16QImode && !TARGET_PREFER_AVX128)
22006 count = 6;
22007 else if (TARGET_SSE4_1)
22008 count = 9;
22009 else if (code == ASHIFTRT)
22010 count = 10;
22011 else
22012 count = 9;
22013 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
22014
22015 case V2DImode:
22016 case V4DImode:
22017 /* V*DImode arithmetic right shift is emulated. */
22018 if (code == ASHIFTRT && !TARGET_AVX512VL)
22019 {
22020 if (constant_op1)
22021 {
22022 if (op1_val == 63)
22023 count = TARGET_SSE4_2 ? 1 : 2;
22024 else if (TARGET_XOP)
22025 count = 2;
22026 else if (TARGET_SSE4_1)
22027 count = 3;
22028 else
22029 count = 4;
22030 }
22031 else if (TARGET_XOP)
22032 count = 3;
22033 else if (TARGET_SSE4_2)
22034 count = 4;
22035 else
22036 count = 5;
22037
22038 return ix86_vec_cost (mode, cost: cost->sse_op * count);
22039 }
22040 /* FALLTHRU */
22041 default:
22042 return ix86_vec_cost (mode, cost: cost->sse_op);
22043 }
22044 }
22045
22046 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22047 {
22048 if (constant_op1)
22049 {
22050 if (op1_val > 32)
22051 return cost->shift_const + COSTS_N_INSNS (2);
22052 else
22053 return cost->shift_const * 2;
22054 }
22055 else
22056 {
22057 if (and_in_op1)
22058 return cost->shift_var * 2;
22059 else
22060 return cost->shift_var * 6 + COSTS_N_INSNS (2);
22061 }
22062 }
22063 else
22064 {
22065 if (constant_op1)
22066 return cost->shift_const;
22067 else if (shift_and_truncate)
22068 {
22069 if (skip_op0)
22070 *skip_op0 = *skip_op1 = true;
22071 /* Return the cost after shift-and truncation. */
22072 return cost->shift_var;
22073 }
22074 else
22075 return cost->shift_var;
22076 }
22077}
22078
22079static int
22080ix86_insn_cost (rtx_insn *insn, bool speed)
22081{
22082 int insn_cost = 0;
22083 /* Add extra cost to avoid post_reload late_combine revert
22084 the optimization did in pass_rpad. */
22085 if (reload_completed
22086 && ix86_rpad_gate ()
22087 && recog_memoized (insn) >= 0
22088 && get_attr_avx_partial_xmm_update (insn)
22089 == AVX_PARTIAL_XMM_UPDATE_TRUE)
22090 insn_cost += COSTS_N_INSNS (3);
22091
22092 return insn_cost + pattern_cost (PATTERN (insn), speed);
22093}
22094
22095/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
22096
22097static int
22098vec_fp_conversion_cost (const struct processor_costs *cost, int size)
22099{
22100 if (size < 128)
22101 return cost->cvtss2sd;
22102 else if (size < 256)
22103 {
22104 if (TARGET_SSE_SPLIT_REGS)
22105 return cost->cvtss2sd * size / 64;
22106 return cost->cvtss2sd;
22107 }
22108 if (size < 512)
22109 return cost->vcvtps2pd256;
22110 else
22111 return cost->vcvtps2pd512;
22112}
22113
22114/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
22115
22116static bool
22117unspec_pcmp_p (rtx x)
22118{
22119 return GET_CODE (x) == UNSPEC
22120 && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
22121}
22122
22123/* Compute a (partial) cost for rtx X. Return true if the complete
22124 cost has been computed, and false if subexpressions should be
22125 scanned. In either case, *TOTAL contains the cost result. */
22126
22127static bool
22128ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
22129 int *total, bool speed)
22130{
22131 rtx mask;
22132 enum rtx_code code = GET_CODE (x);
22133 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22134 const struct processor_costs *cost
22135 = speed ? ix86_tune_cost : &ix86_size_cost;
22136 int src_cost;
22137
22138 /* Handling different vternlog variants. */
22139 if ((GET_MODE_SIZE (mode) == 64
22140 ? TARGET_AVX512F
22141 : (TARGET_AVX512VL
22142 || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
22143 && GET_MODE_SIZE (mode) >= 16
22144 && outer_code_i == SET
22145 && ternlog_operand (x, mode))
22146 {
22147 rtx args[3];
22148
22149 args[0] = NULL_RTX;
22150 args[1] = NULL_RTX;
22151 args[2] = NULL_RTX;
22152 int idx = ix86_ternlog_idx (op: x, args);
22153 gcc_assert (idx >= 0);
22154
22155 *total = cost->sse_op;
22156 for (int i = 0; i != 3; i++)
22157 if (args[i])
22158 *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
22159 return true;
22160 }
22161
22162
22163 switch (code)
22164 {
22165 case SET:
22166 if (register_operand (SET_DEST (x), VOIDmode)
22167 && register_operand (SET_SRC (x), VOIDmode))
22168 {
22169 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
22170 return true;
22171 }
22172
22173 if (register_operand (SET_SRC (x), VOIDmode))
22174 /* Avoid potentially incorrect high cost from rtx_costs
22175 for non-tieable SUBREGs. */
22176 src_cost = 0;
22177 else
22178 {
22179 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
22180
22181 if (CONSTANT_P (SET_SRC (x)))
22182 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
22183 a small value, possibly zero for cheap constants. */
22184 src_cost += COSTS_N_INSNS (1);
22185 }
22186
22187 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
22188 return true;
22189
22190 case CONST_INT:
22191 case CONST:
22192 case LABEL_REF:
22193 case SYMBOL_REF:
22194 if (x86_64_immediate_operand (x, VOIDmode))
22195 *total = 0;
22196 else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
22197 /* Consider the zext constants slightly more expensive, as they
22198 can't appear in most instructions. */
22199 *total = 1;
22200 else
22201 /* movabsq is slightly more expensive than a simple instruction. */
22202 *total = COSTS_N_INSNS (1) + 1;
22203 return true;
22204
22205 case CONST_DOUBLE:
22206 if (IS_STACK_MODE (mode))
22207 switch (standard_80387_constant_p (x))
22208 {
22209 case -1:
22210 case 0:
22211 break;
22212 case 1: /* 0.0 */
22213 *total = 1;
22214 return true;
22215 default: /* Other constants */
22216 *total = 2;
22217 return true;
22218 }
22219 /* FALLTHRU */
22220
22221 case CONST_VECTOR:
22222 switch (standard_sse_constant_p (x, pred_mode: mode))
22223 {
22224 case 0:
22225 break;
22226 case 1: /* 0: xor eliminates false dependency */
22227 *total = 0;
22228 return true;
22229 default: /* -1: cmp contains false dependency */
22230 *total = 1;
22231 return true;
22232 }
22233 /* FALLTHRU */
22234
22235 case CONST_WIDE_INT:
22236 /* Fall back to (MEM (SYMBOL_REF)), since that's where
22237 it'll probably end up. Add a penalty for size. */
22238 *total = (COSTS_N_INSNS (1)
22239 + (!TARGET_64BIT && flag_pic)
22240 + (GET_MODE_SIZE (mode) <= 4
22241 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
22242 return true;
22243
22244 case ZERO_EXTEND:
22245 /* The zero extensions is often completely free on x86_64, so make
22246 it as cheap as possible. */
22247 if (TARGET_64BIT && mode == DImode
22248 && GET_MODE (XEXP (x, 0)) == SImode)
22249 *total = 1;
22250 else if (TARGET_ZERO_EXTEND_WITH_AND)
22251 *total = cost->add;
22252 else
22253 *total = cost->movzx;
22254 return false;
22255
22256 case SIGN_EXTEND:
22257 *total = cost->movsx;
22258 return false;
22259
22260 case ASHIFT:
22261 if (SCALAR_INT_MODE_P (mode)
22262 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
22263 && CONST_INT_P (XEXP (x, 1)))
22264 {
22265 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22266 if (value == 1)
22267 {
22268 *total = cost->add;
22269 return false;
22270 }
22271 if ((value == 2 || value == 3)
22272 && cost->lea <= cost->shift_const)
22273 {
22274 *total = cost->lea;
22275 return false;
22276 }
22277 }
22278 /* FALLTHRU */
22279
22280 case ROTATE:
22281 case ASHIFTRT:
22282 case LSHIFTRT:
22283 case ROTATERT:
22284 bool skip_op0, skip_op1;
22285 *total = ix86_shift_rotate_cost (cost, code, mode,
22286 CONSTANT_P (XEXP (x, 1)),
22287 CONST_INT_P (XEXP (x, 1))
22288 ? INTVAL (XEXP (x, 1)) : -1,
22289 GET_CODE (XEXP (x, 1)) == AND,
22290 SUBREG_P (XEXP (x, 1))
22291 && GET_CODE (XEXP (XEXP (x, 1),
22292 0)) == AND,
22293 skip_op0: &skip_op0, skip_op1: &skip_op1);
22294 if (skip_op0 || skip_op1)
22295 {
22296 if (!skip_op0)
22297 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22298 if (!skip_op1)
22299 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
22300 return true;
22301 }
22302 return false;
22303
22304 case FMA:
22305 {
22306 rtx sub;
22307
22308 gcc_assert (FLOAT_MODE_P (mode));
22309 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
22310
22311 *total = ix86_vec_cost (mode,
22312 GET_MODE_INNER (mode) == SFmode
22313 ? cost->fmass : cost->fmasd);
22314 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
22315
22316 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
22317 sub = XEXP (x, 0);
22318 if (GET_CODE (sub) == NEG)
22319 sub = XEXP (sub, 0);
22320 *total += rtx_cost (sub, mode, FMA, 0, speed);
22321
22322 sub = XEXP (x, 2);
22323 if (GET_CODE (sub) == NEG)
22324 sub = XEXP (sub, 0);
22325 *total += rtx_cost (sub, mode, FMA, 2, speed);
22326 return true;
22327 }
22328
22329 case MULT:
22330 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
22331 {
22332 rtx op0 = XEXP (x, 0);
22333 rtx op1 = XEXP (x, 1);
22334 int nbits;
22335 if (CONST_INT_P (XEXP (x, 1)))
22336 {
22337 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22338 for (nbits = 0; value != 0; value &= value - 1)
22339 nbits++;
22340 }
22341 else
22342 /* This is arbitrary. */
22343 nbits = 7;
22344
22345 /* Compute costs correctly for widening multiplication. */
22346 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22347 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22348 == GET_MODE_SIZE (mode))
22349 {
22350 int is_mulwiden = 0;
22351 machine_mode inner_mode = GET_MODE (op0);
22352
22353 if (GET_CODE (op0) == GET_CODE (op1))
22354 is_mulwiden = 1, op1 = XEXP (op1, 0);
22355 else if (CONST_INT_P (op1))
22356 {
22357 if (GET_CODE (op0) == SIGN_EXTEND)
22358 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22359 == INTVAL (op1);
22360 else
22361 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22362 }
22363
22364 if (is_mulwiden)
22365 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22366 }
22367
22368 int mult_init;
22369 // Double word multiplication requires 3 mults and 2 adds.
22370 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22371 {
22372 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
22373 + 2 * cost->add;
22374 nbits *= 3;
22375 }
22376 else mult_init = cost->mult_init[MODE_INDEX (mode)];
22377
22378 *total = (mult_init
22379 + nbits * cost->mult_bit
22380 + rtx_cost (op0, mode, outer_code, opno, speed)
22381 + rtx_cost (op1, mode, outer_code, opno, speed));
22382
22383 return true;
22384 }
22385 *total = ix86_multiplication_cost (cost, mode);
22386 return false;
22387
22388 case DIV:
22389 case UDIV:
22390 case MOD:
22391 case UMOD:
22392 *total = ix86_division_cost (cost, mode);
22393 return false;
22394
22395 case PLUS:
22396 if (GET_MODE_CLASS (mode) == MODE_INT
22397 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22398 {
22399 if (GET_CODE (XEXP (x, 0)) == PLUS
22400 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22401 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22402 && CONSTANT_P (XEXP (x, 1)))
22403 {
22404 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22405 if (val == 2 || val == 4 || val == 8)
22406 {
22407 *total = cost->lea;
22408 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22409 outer_code, opno, speed);
22410 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
22411 outer_code, opno, speed);
22412 *total += rtx_cost (XEXP (x, 1), mode,
22413 outer_code, opno, speed);
22414 return true;
22415 }
22416 }
22417 else if (GET_CODE (XEXP (x, 0)) == MULT
22418 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22419 {
22420 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22421 if (val == 2 || val == 4 || val == 8)
22422 {
22423 *total = cost->lea;
22424 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22425 outer_code, opno, speed);
22426 *total += rtx_cost (XEXP (x, 1), mode,
22427 outer_code, opno, speed);
22428 return true;
22429 }
22430 }
22431 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22432 {
22433 rtx op = XEXP (XEXP (x, 0), 0);
22434
22435 /* Add with carry, ignore the cost of adding a carry flag. */
22436 if (ix86_carry_flag_operator (op, mode)
22437 || ix86_carry_flag_unset_operator (op, mode))
22438 *total = cost->add;
22439 else
22440 {
22441 *total = cost->lea;
22442 *total += rtx_cost (op, mode,
22443 outer_code, opno, speed);
22444 }
22445
22446 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22447 outer_code, opno, speed);
22448 *total += rtx_cost (XEXP (x, 1), mode,
22449 outer_code, opno, speed);
22450 return true;
22451 }
22452 }
22453 /* FALLTHRU */
22454
22455 case MINUS:
22456 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
22457 if (GET_MODE_CLASS (mode) == MODE_INT
22458 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
22459 && GET_CODE (XEXP (x, 0)) == MINUS
22460 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
22461 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
22462 {
22463 *total = cost->add;
22464 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22465 outer_code, opno, speed);
22466 *total += rtx_cost (XEXP (x, 1), mode,
22467 outer_code, opno, speed);
22468 return true;
22469 }
22470
22471 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22472 *total = cost->addss;
22473 else if (X87_FLOAT_MODE_P (mode))
22474 *total = cost->fadd;
22475 else if (FLOAT_MODE_P (mode))
22476 *total = ix86_vec_cost (mode, cost: cost->addss);
22477 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22478 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22479 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22480 *total = cost->add * 2;
22481 else
22482 *total = cost->add;
22483 return false;
22484
22485 case IOR:
22486 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22487 || SSE_FLOAT_MODE_P (mode))
22488 {
22489 /* (ior (not ...) ...) can be a single insn in AVX512. */
22490 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22491 && (GET_MODE_SIZE (mode) == 64
22492 || (TARGET_AVX512VL
22493 && (GET_MODE_SIZE (mode) == 32
22494 || GET_MODE_SIZE (mode) == 16))))
22495 {
22496 rtx right = GET_CODE (XEXP (x, 1)) != NOT
22497 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22498
22499 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22500 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22501 outer_code, opno, speed)
22502 + rtx_cost (right, mode, outer_code, opno, speed);
22503 return true;
22504 }
22505 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22506 }
22507 else if (TARGET_64BIT
22508 && mode == TImode
22509 && GET_CODE (XEXP (x, 0)) == ASHIFT
22510 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22511 && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22512 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22513 && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22514 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22515 && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22516 {
22517 /* *concatditi3 is cheap. */
22518 rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22519 rtx op1 = XEXP (XEXP (x, 1), 0);
22520 *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22521 ? COSTS_N_INSNS (1) /* movq. */
22522 : set_src_cost (x: op0, DImode, speed_p: speed);
22523 *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22524 ? COSTS_N_INSNS (1) /* movq. */
22525 : set_src_cost (x: op1, DImode, speed_p: speed);
22526 return true;
22527 }
22528 else if (TARGET_64BIT
22529 && mode == TImode
22530 && GET_CODE (XEXP (x, 0)) == AND
22531 && REG_P (XEXP (XEXP (x, 0), 0))
22532 && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22533 && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22534 && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22535 && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22536 && GET_CODE (XEXP (x, 1)) == ASHIFT
22537 && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22538 && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22539 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22540 && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22541 {
22542 /* *insvti_highpart is cheap. */
22543 rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22544 *total = COSTS_N_INSNS (1) + 1;
22545 *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22546 ? COSTS_N_INSNS (1) /* movq. */
22547 : set_src_cost (x: op, DImode, speed_p: speed);
22548 return true;
22549 }
22550 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22551 *total = cost->add * 2;
22552 else
22553 *total = cost->add;
22554 return false;
22555
22556 case XOR:
22557 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22558 || SSE_FLOAT_MODE_P (mode))
22559 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22560 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22561 *total = cost->add * 2;
22562 else
22563 *total = cost->add;
22564 return false;
22565
22566 case AND:
22567 if (address_no_seg_operand (x, mode))
22568 {
22569 *total = cost->lea;
22570 return true;
22571 }
22572 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22573 || SSE_FLOAT_MODE_P (mode))
22574 {
22575 /* pandn is a single instruction. */
22576 if (GET_CODE (XEXP (x, 0)) == NOT)
22577 {
22578 rtx right = XEXP (x, 1);
22579
22580 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22581 if (GET_CODE (right) == NOT && TARGET_AVX512F
22582 && (GET_MODE_SIZE (mode) == 64
22583 || (TARGET_AVX512VL
22584 && (GET_MODE_SIZE (mode) == 32
22585 || GET_MODE_SIZE (mode) == 16))))
22586 right = XEXP (right, 0);
22587
22588 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22589 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22590 outer_code, opno, speed)
22591 + rtx_cost (right, mode, outer_code, opno, speed);
22592 return true;
22593 }
22594 else if (GET_CODE (XEXP (x, 1)) == NOT)
22595 {
22596 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22597 + rtx_cost (XEXP (x, 0), mode,
22598 outer_code, opno, speed)
22599 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22600 outer_code, opno, speed);
22601 return true;
22602 }
22603 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22604 }
22605 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22606 {
22607 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22608 {
22609 *total = cost->add * 2
22610 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22611 outer_code, opno, speed)
22612 + rtx_cost (XEXP (x, 1), mode,
22613 outer_code, opno, speed);
22614 return true;
22615 }
22616 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
22617 {
22618 *total = cost->add * 2
22619 + rtx_cost (XEXP (x, 0), mode,
22620 outer_code, opno, speed)
22621 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22622 outer_code, opno, speed);
22623 return true;
22624 }
22625 *total = cost->add * 2;
22626 }
22627 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22628 {
22629 *total = cost->add
22630 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22631 outer_code, opno, speed)
22632 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22633 return true;
22634 }
22635 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
22636 {
22637 *total = cost->add
22638 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
22639 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22640 outer_code, opno, speed);
22641 return true;
22642 }
22643 else
22644 *total = cost->add;
22645 return false;
22646
22647 case NOT:
22648 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22649 {
22650 /* (not (xor ...)) can be a single insn in AVX512. */
22651 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
22652 && (GET_MODE_SIZE (mode) == 64
22653 || (TARGET_AVX512VL
22654 && (GET_MODE_SIZE (mode) == 32
22655 || GET_MODE_SIZE (mode) == 16))))
22656 {
22657 *total = ix86_vec_cost (mode, cost: cost->sse_op)
22658 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22659 outer_code, opno, speed)
22660 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22661 outer_code, opno, speed);
22662 return true;
22663 }
22664
22665 // vnot is pxor -1.
22666 *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1;
22667 }
22668 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22669 *total = cost->add * 2;
22670 else
22671 *total = cost->add;
22672 return false;
22673
22674 case NEG:
22675 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22676 *total = cost->sse_op;
22677 else if (X87_FLOAT_MODE_P (mode))
22678 *total = cost->fchs;
22679 else if (FLOAT_MODE_P (mode))
22680 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22681 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22682 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22683 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22684 *total = cost->add * 3;
22685 else
22686 *total = cost->add;
22687 return false;
22688
22689 case COMPARE:
22690 rtx op0, op1;
22691 op0 = XEXP (x, 0);
22692 op1 = XEXP (x, 1);
22693 if (GET_CODE (op0) == ZERO_EXTRACT
22694 && XEXP (op0, 1) == const1_rtx
22695 && CONST_INT_P (XEXP (op0, 2))
22696 && op1 == const0_rtx)
22697 {
22698 /* This kind of construct is implemented using test[bwl].
22699 Treat it as if we had an AND. */
22700 mode = GET_MODE (XEXP (op0, 0));
22701 *total = (cost->add
22702 + rtx_cost (XEXP (op0, 0), mode, outer_code,
22703 opno, speed)
22704 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
22705 return true;
22706 }
22707
22708 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
22709 {
22710 /* This is an overflow detection, count it as a normal compare. */
22711 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
22712 return true;
22713 }
22714
22715 rtx geu;
22716 /* Match x
22717 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22718 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22719 if (mode == CCCmode
22720 && GET_CODE (op0) == NEG
22721 && GET_CODE (geu = XEXP (op0, 0)) == GEU
22722 && REG_P (XEXP (geu, 0))
22723 && (GET_MODE (XEXP (geu, 0)) == CCCmode
22724 || GET_MODE (XEXP (geu, 0)) == CCmode)
22725 && REGNO (XEXP (geu, 0)) == FLAGS_REG
22726 && XEXP (geu, 1) == const0_rtx
22727 && GET_CODE (op1) == LTU
22728 && REG_P (XEXP (op1, 0))
22729 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
22730 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22731 && XEXP (op1, 1) == const0_rtx)
22732 {
22733 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22734 *total = 0;
22735 return true;
22736 }
22737 /* Match x
22738 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22739 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22740 if (mode == CCCmode
22741 && GET_CODE (op0) == NEG
22742 && GET_CODE (XEXP (op0, 0)) == LTU
22743 && REG_P (XEXP (XEXP (op0, 0), 0))
22744 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
22745 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
22746 && XEXP (XEXP (op0, 0), 1) == const0_rtx
22747 && GET_CODE (op1) == GEU
22748 && REG_P (XEXP (op1, 0))
22749 && GET_MODE (XEXP (op1, 0)) == CCCmode
22750 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22751 && XEXP (op1, 1) == const0_rtx)
22752 {
22753 /* This is *x86_cmc. */
22754 if (!speed)
22755 *total = COSTS_N_BYTES (1);
22756 else if (TARGET_SLOW_STC)
22757 *total = COSTS_N_INSNS (2);
22758 else
22759 *total = COSTS_N_INSNS (1);
22760 return true;
22761 }
22762
22763 if (SCALAR_INT_MODE_P (GET_MODE (op0))
22764 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
22765 {
22766 if (op1 == const0_rtx)
22767 *total = cost->add
22768 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
22769 else
22770 *total = 3*cost->add
22771 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
22772 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
22773 return true;
22774 }
22775
22776 /* The embedded comparison operand is completely free. */
22777 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
22778 *total = 0;
22779
22780 return false;
22781
22782 case FLOAT_EXTEND:
22783 /* x87 represents all values extended to 80bit. */
22784 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22785 *total = 0;
22786 else
22787 *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
22788 return false;
22789
22790 case FLOAT_TRUNCATE:
22791 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22792 *total = cost->fadd;
22793 else
22794 *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
22795 return false;
22796 case FLOAT:
22797 case UNSIGNED_FLOAT:
22798 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22799 /* TODO: We do not have cost tables for x87. */
22800 *total = cost->fadd;
22801 else if (VECTOR_MODE_P (mode))
22802 *total = ix86_vec_cost (mode, cost: cost->cvtpi2ps);
22803 else
22804 *total = cost->cvtsi2ss;
22805 return false;
22806
22807 case FIX:
22808 case UNSIGNED_FIX:
22809 if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22810 /* TODO: We do not have cost tables for x87. */
22811 *total = cost->fadd;
22812 else if (VECTOR_MODE_P (mode))
22813 *total = ix86_vec_cost (mode, cost: cost->cvtps2pi);
22814 else
22815 *total = cost->cvtss2si;
22816 return false;
22817
22818 case ABS:
22819 /* SSE requires memory load for the constant operand. It may make
22820 sense to account for this. Of course the constant operand may or
22821 may not be reused. */
22822 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22823 *total = cost->sse_op;
22824 else if (X87_FLOAT_MODE_P (mode))
22825 *total = cost->fabs;
22826 else if (FLOAT_MODE_P (mode))
22827 *total = ix86_vec_cost (mode, cost: cost->sse_op);
22828 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22829 *total = cost->sse_op;
22830 return false;
22831
22832 case SQRT:
22833 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
22834 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
22835 else if (X87_FLOAT_MODE_P (mode))
22836 *total = cost->fsqrt;
22837 else if (FLOAT_MODE_P (mode))
22838 *total = ix86_vec_cost (mode,
22839 cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd);
22840 return false;
22841
22842 case UNSPEC:
22843 if (XINT (x, 1) == UNSPEC_TP)
22844 *total = 0;
22845 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
22846 {
22847 *total = cost->sse_op;
22848 *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
22849 *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
22850 *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
22851 return true;
22852 }
22853 else if (XINT (x, 1) == UNSPEC_PTEST)
22854 {
22855 *total = cost->sse_op;
22856 rtx test_op0 = XVECEXP (x, 0, 0);
22857 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
22858 return false;
22859 if (GET_CODE (test_op0) == AND)
22860 {
22861 rtx and_op0 = XEXP (test_op0, 0);
22862 if (GET_CODE (and_op0) == NOT)
22863 and_op0 = XEXP (and_op0, 0);
22864 *total += rtx_cost (and_op0, GET_MODE (and_op0),
22865 AND, 0, speed)
22866 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
22867 AND, 1, speed);
22868 }
22869 else
22870 *total = rtx_cost (test_op0, GET_MODE (test_op0),
22871 UNSPEC, 0, speed);
22872 return true;
22873 }
22874 return false;
22875
22876 case VEC_CONCAT:
22877 /* ??? Assume all of these vector manipulation patterns are
22878 recognizable. In which case they all pretty much have the
22879 same cost.
22880 ??? We should still recruse when computing cost. */
22881 *total = cost->sse_op;
22882 return true;
22883
22884 case VEC_SELECT:
22885 /* Special case extracting lower part from the vector.
22886 This by itself needs to code and most of SSE/AVX instructions have
22887 packed and single forms where the single form may be represented
22888 by such VEC_SELECT.
22889
22890 Use cost 1 (despite the fact that functionally equivalent SUBREG has
22891 cost 0). Making VEC_SELECT completely free, for example instructs CSE
22892 to forward propagate VEC_SELECT into
22893
22894 (set (reg eax) (reg src))
22895
22896 which then prevents fwprop and combining. See i.e.
22897 gcc.target/i386/pr91103-1.c.
22898
22899 ??? rtvec_series_p test should be, for valid patterns, equivalent to
22900 vec_series_lowpart_p but is not, since the latter calls
22901 can_cange_mode_class on ALL_REGS and this return false since x87 does
22902 not support subregs at all. */
22903 if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
22904 *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
22905 outer_code, opno, speed) + 1;
22906 else
22907 /* ??? We should still recruse when computing cost. */
22908 *total = cost->sse_op;
22909 return true;
22910
22911 case VEC_DUPLICATE:
22912 *total = rtx_cost (XEXP (x, 0),
22913 GET_MODE (XEXP (x, 0)),
22914 VEC_DUPLICATE, 0, speed);
22915 /* It's broadcast instruction, not embedded broadcasting. */
22916 if (outer_code == SET)
22917 *total += cost->sse_op;
22918
22919 return true;
22920
22921 case VEC_MERGE:
22922 mask = XEXP (x, 2);
22923 /* Scalar versions of SSE instructions may be represented as:
22924
22925 (vec_merge (vec_duplicate (operation ....))
22926 (register or memory)
22927 (const_int 1))
22928
22929 In this case vec_merge and vec_duplicate is for free.
22930 Just recurse into operation and second operand. */
22931 if (mask == const1_rtx
22932 && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
22933 {
22934 *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22935 outer_code, opno, speed)
22936 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22937 return true;
22938 }
22939 /* This is masked instruction, assume the same cost,
22940 as nonmasked variant. */
22941 else if (TARGET_AVX512F
22942 && (register_operand (mask, GET_MODE (mask))
22943 /* Redunduant clean up of high bits for kmask with VL=2/4
22944 .i.e (vec_merge op0, op1, (and op3 15)). */
22945 || (GET_CODE (mask) == AND
22946 && register_operand (XEXP (mask, 0), GET_MODE (mask))
22947 && CONST_INT_P (XEXP (mask, 1))
22948 && ((INTVAL (XEXP (mask, 1)) == 3
22949 && GET_MODE_NUNITS (mode) == 2)
22950 || (INTVAL (XEXP (mask, 1)) == 15
22951 && GET_MODE_NUNITS (mode) == 4)))))
22952 {
22953 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
22954 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22955 return true;
22956 }
22957 /* Combination of the two above:
22958
22959 (vec_merge (vec_merge (vec_duplicate (operation ...))
22960 (register or memory)
22961 (reg:QI mask))
22962 (register or memory)
22963 (const_int 1))
22964
22965 i.e. avx512fp16_vcvtss2sh_mask. */
22966 else if (TARGET_AVX512F
22967 && mask == const1_rtx
22968 && GET_CODE (XEXP (x, 0)) == VEC_MERGE
22969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
22970 && register_operand (XEXP (XEXP (x, 0), 2),
22971 GET_MODE (XEXP (XEXP (x, 0), 2))))
22972 {
22973 *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22974 mode, outer_code, opno, speed)
22975 + rtx_cost (XEXP (XEXP (x, 0), 1),
22976 mode, outer_code, opno, speed)
22977 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22978 return true;
22979 }
22980 /* vcmp. */
22981 else if (unspec_pcmp_p (x: mask)
22982 || (GET_CODE (mask) == NOT
22983 && unspec_pcmp_p (XEXP (mask, 0))))
22984 {
22985 rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
22986 rtx unsop0 = XVECEXP (uns, 0, 0);
22987 /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
22988 cost the same as register.
22989 This is used by avx_cmp<mode>3_ltint_not. */
22990 if (GET_CODE (unsop0) == SUBREG)
22991 unsop0 = XEXP (unsop0, 0);
22992 if (GET_CODE (unsop0) == NOT)
22993 unsop0 = XEXP (unsop0, 0);
22994 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
22995 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
22996 + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
22997 + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
22998 + cost->sse_op;
22999 return true;
23000 }
23001 else
23002 *total = cost->sse_op;
23003 return false;
23004
23005 case MEM:
23006 /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
23007 or variants in ix86_vector_duplicate_simode_const. */
23008
23009 if (GET_MODE_SIZE (mode) >= 16
23010 && VECTOR_MODE_P (mode)
23011 && SYMBOL_REF_P (XEXP (x, 0))
23012 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
23013 && ix86_broadcast_from_constant (mode, x))
23014 {
23015 *total = COSTS_N_INSNS (2) + speed;
23016 return true;
23017 }
23018
23019 /* An insn that accesses memory is slightly more expensive
23020 than one that does not. */
23021 if (speed)
23022 {
23023 *total += 1;
23024 rtx addr = XEXP (x, 0);
23025 /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
23026 so for MEM (reg) and MEM (reg + 4), the former costs 5,
23027 the latter costs 9, it is not accurate for x86. Ideally
23028 address_cost should be used, but it reduce cost too much.
23029 So current solution is make constant disp as cheap as possible. */
23030 if (GET_CODE (addr) == PLUS
23031 && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
23032 /* Only hanlde (reg + disp) since other forms of addr are mostly LEA,
23033 there's no additional cost for the plus of disp. */
23034 && register_operand (XEXP (addr, 0), Pmode))
23035 {
23036 *total += 1;
23037 *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
23038 return true;
23039 }
23040 }
23041
23042 return false;
23043
23044 case ZERO_EXTRACT:
23045 if (XEXP (x, 1) == const1_rtx
23046 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
23047 && GET_MODE (XEXP (x, 2)) == SImode
23048 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
23049 {
23050 /* Ignore cost of zero extension and masking of last argument. */
23051 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23052 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23053 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
23054 return true;
23055 }
23056 return false;
23057
23058 case IF_THEN_ELSE:
23059 if (TARGET_XOP
23060 && VECTOR_MODE_P (mode)
23061 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
23062 {
23063 /* vpcmov. */
23064 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
23065 if (!REG_P (XEXP (x, 0)))
23066 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23067 if (!REG_P (XEXP (x, 1)))
23068 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23069 if (!REG_P (XEXP (x, 2)))
23070 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23071 return true;
23072 }
23073 else if (TARGET_CMOVE
23074 && SCALAR_INT_MODE_P (mode)
23075 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
23076 {
23077 /* cmov. */
23078 *total = COSTS_N_INSNS (1);
23079 if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
23080 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
23081 if (!REG_P (XEXP (x, 1)))
23082 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
23083 if (!REG_P (XEXP (x, 2)))
23084 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
23085 return true;
23086 }
23087 return false;
23088
23089 default:
23090 return false;
23091 }
23092}
23093
23094#if TARGET_MACHO
23095
23096static int current_machopic_label_num;
23097
23098/* Given a symbol name and its associated stub, write out the
23099 definition of the stub. */
23100
23101void
23102machopic_output_stub (FILE *file, const char *symb, const char *stub)
23103{
23104 unsigned int length;
23105 char *binder_name, *symbol_name, lazy_ptr_name[32];
23106 int label = ++current_machopic_label_num;
23107
23108 /* For 64-bit we shouldn't get here. */
23109 gcc_assert (!TARGET_64BIT);
23110
23111 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23112 symb = targetm.strip_name_encoding (symb);
23113
23114 length = strlen (stub);
23115 binder_name = XALLOCAVEC (char, length + 32);
23116 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23117
23118 length = strlen (symb);
23119 symbol_name = XALLOCAVEC (char, length + 32);
23120 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23121
23122 sprintf (lazy_ptr_name, "L%d$lz", label);
23123
23124 if (MACHOPIC_ATT_STUB)
23125 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
23126 else if (MACHOPIC_PURE)
23127 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
23128 else
23129 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23130
23131 fprintf (file, "%s:\n", stub);
23132 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23133
23134 if (MACHOPIC_ATT_STUB)
23135 {
23136 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
23137 }
23138 else if (MACHOPIC_PURE)
23139 {
23140 /* PIC stub. */
23141 /* 25-byte PIC stub using "CALL get_pc_thunk". */
23142 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
23143 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
23144 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
23145 label, lazy_ptr_name, label);
23146 fprintf (file, "\tjmp\t*%%ecx\n");
23147 }
23148 else
23149 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23150
23151 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
23152 it needs no stub-binding-helper. */
23153 if (MACHOPIC_ATT_STUB)
23154 return;
23155
23156 fprintf (file, "%s:\n", binder_name);
23157
23158 if (MACHOPIC_PURE)
23159 {
23160 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
23161 fprintf (file, "\tpushl\t%%ecx\n");
23162 }
23163 else
23164 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23165
23166 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
23167
23168 /* N.B. Keep the correspondence of these
23169 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
23170 old-pic/new-pic/non-pic stubs; altering this will break
23171 compatibility with existing dylibs. */
23172 if (MACHOPIC_PURE)
23173 {
23174 /* 25-byte PIC stub using "CALL get_pc_thunk". */
23175 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
23176 }
23177 else
23178 /* 16-byte -mdynamic-no-pic stub. */
23179 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
23180
23181 fprintf (file, "%s:\n", lazy_ptr_name);
23182 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23183 fprintf (file, ASM_LONG "%s\n", binder_name);
23184}
23185#endif /* TARGET_MACHO */
23186
23187/* Order the registers for register allocator. */
23188
23189void
23190x86_order_regs_for_local_alloc (void)
23191{
23192 int pos = 0;
23193 int i;
23194
23195 /* First allocate the local general purpose registers. */
23196 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23197 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i))
23198 reg_alloc_order [pos++] = i;
23199
23200 /* Global general purpose registers. */
23201 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23202 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i))
23203 reg_alloc_order [pos++] = i;
23204
23205 /* x87 registers come first in case we are doing FP math
23206 using them. */
23207 if (!TARGET_SSE_MATH)
23208 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23209 reg_alloc_order [pos++] = i;
23210
23211 /* SSE registers. */
23212 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23213 reg_alloc_order [pos++] = i;
23214 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23215 reg_alloc_order [pos++] = i;
23216
23217 /* Extended REX SSE registers. */
23218 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
23219 reg_alloc_order [pos++] = i;
23220
23221 /* Mask register. */
23222 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
23223 reg_alloc_order [pos++] = i;
23224
23225 /* x87 registers. */
23226 if (TARGET_SSE_MATH)
23227 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23228 reg_alloc_order [pos++] = i;
23229
23230 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23231 reg_alloc_order [pos++] = i;
23232
23233 /* Initialize the rest of array as we do not allocate some registers
23234 at all. */
23235 while (pos < FIRST_PSEUDO_REGISTER)
23236 reg_alloc_order [pos++] = 0;
23237}
23238
23239static bool
23240ix86_ms_bitfield_layout_p (const_tree record_type)
23241{
23242 return ((TARGET_MS_BITFIELD_LAYOUT
23243 && !lookup_attribute (attr_name: "gcc_struct", TYPE_ATTRIBUTES (record_type)))
23244 || lookup_attribute (attr_name: "ms_struct", TYPE_ATTRIBUTES (record_type)));
23245}
23246
23247/* Returns an expression indicating where the this parameter is
23248 located on entry to the FUNCTION. */
23249
23250static rtx
23251x86_this_parameter (tree function)
23252{
23253 tree type = TREE_TYPE (function);
23254 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23255 int nregs;
23256
23257 if (TARGET_64BIT)
23258 {
23259 const int *parm_regs;
23260
23261 if (ix86_function_type_abi (fntype: type) == MS_ABI)
23262 parm_regs = x86_64_ms_abi_int_parameter_registers;
23263 else
23264 parm_regs = x86_64_int_parameter_registers;
23265 return gen_rtx_REG (Pmode, parm_regs[aggr]);
23266 }
23267
23268 nregs = ix86_function_regparm (type, decl: function);
23269
23270 if (nregs > 0 && !stdarg_p (type))
23271 {
23272 int regno;
23273 unsigned int ccvt = ix86_get_callcvt (type);
23274
23275 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23276 regno = aggr ? DX_REG : CX_REG;
23277 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23278 {
23279 regno = CX_REG;
23280 if (aggr)
23281 return gen_rtx_MEM (SImode,
23282 plus_constant (Pmode, stack_pointer_rtx, 4));
23283 }
23284 else
23285 {
23286 regno = AX_REG;
23287 if (aggr)
23288 {
23289 regno = DX_REG;
23290 if (nregs == 1)
23291 return gen_rtx_MEM (SImode,
23292 plus_constant (Pmode,
23293 stack_pointer_rtx, 4));
23294 }
23295 }
23296 return gen_rtx_REG (SImode, regno);
23297 }
23298
23299 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
23300 aggr ? 8 : 4));
23301}
23302
23303/* Determine whether x86_output_mi_thunk can succeed. */
23304
23305static bool
23306x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
23307 const_tree function)
23308{
23309 /* 64-bit can handle anything. */
23310 if (TARGET_64BIT)
23311 return true;
23312
23313 /* For 32-bit, everything's fine if we have one free register. */
23314 if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3)
23315 return true;
23316
23317 /* Need a free register for vcall_offset. */
23318 if (vcall_offset)
23319 return false;
23320
23321 /* Need a free register for GOT references. */
23322 if (flag_pic && !targetm.binds_local_p (function))
23323 return false;
23324
23325 /* Otherwise ok. */
23326 return true;
23327}
23328
23329/* Output the assembler code for a thunk function. THUNK_DECL is the
23330 declaration for the thunk function itself, FUNCTION is the decl for
23331 the target function. DELTA is an immediate constant offset to be
23332 added to THIS. If VCALL_OFFSET is nonzero, the word at
23333 *(*this + vcall_offset) should be added to THIS. */
23334
23335static void
23336x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
23337 HOST_WIDE_INT vcall_offset, tree function)
23338{
23339 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23340 rtx this_param = x86_this_parameter (function);
23341 rtx this_reg, tmp, fnaddr;
23342 unsigned int tmp_regno;
23343 rtx_insn *insn;
23344 int saved_flag_force_indirect_call = flag_force_indirect_call;
23345
23346 if (TARGET_64BIT)
23347 tmp_regno = R10_REG;
23348 else
23349 {
23350 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
23351 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
23352 tmp_regno = AX_REG;
23353 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
23354 tmp_regno = DX_REG;
23355 else
23356 tmp_regno = CX_REG;
23357
23358 if (flag_pic)
23359 flag_force_indirect_call = 0;
23360 }
23361
23362 emit_note (NOTE_INSN_PROLOGUE_END);
23363
23364 /* CET is enabled, insert EB instruction. */
23365 if ((flag_cf_protection & CF_BRANCH))
23366 emit_insn (gen_nop_endbr ());
23367
23368 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23369 pull it in now and let DELTA benefit. */
23370 if (REG_P (this_param))
23371 this_reg = this_param;
23372 else if (vcall_offset)
23373 {
23374 /* Put the this parameter into %eax. */
23375 this_reg = gen_rtx_REG (Pmode, AX_REG);
23376 emit_move_insn (this_reg, this_param);
23377 }
23378 else
23379 this_reg = NULL_RTX;
23380
23381 /* Adjust the this parameter by a fixed constant. */
23382 if (delta)
23383 {
23384 rtx delta_rtx = GEN_INT (delta);
23385 rtx delta_dst = this_reg ? this_reg : this_param;
23386
23387 if (TARGET_64BIT)
23388 {
23389 if (!x86_64_general_operand (delta_rtx, Pmode))
23390 {
23391 tmp = gen_rtx_REG (Pmode, tmp_regno);
23392 emit_move_insn (tmp, delta_rtx);
23393 delta_rtx = tmp;
23394 }
23395 }
23396
23397 ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx);
23398 }
23399
23400 /* Adjust the this parameter by a value stored in the vtable. */
23401 if (vcall_offset)
23402 {
23403 rtx vcall_addr, vcall_mem, this_mem;
23404
23405 tmp = gen_rtx_REG (Pmode, tmp_regno);
23406
23407 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
23408 if (Pmode != ptr_mode)
23409 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
23410 emit_move_insn (tmp, this_mem);
23411
23412 /* Adjust the this parameter. */
23413 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
23414 if (TARGET_64BIT
23415 && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true))
23416 {
23417 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
23418 emit_move_insn (tmp2, GEN_INT (vcall_offset));
23419 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
23420 }
23421
23422 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
23423 if (Pmode != ptr_mode)
23424 emit_insn (gen_addsi_1_zext (this_reg,
23425 gen_rtx_REG (ptr_mode,
23426 REGNO (this_reg)),
23427 vcall_mem));
23428 else
23429 ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem);
23430 }
23431
23432 /* If necessary, drop THIS back to its stack slot. */
23433 if (this_reg && this_reg != this_param)
23434 emit_move_insn (this_param, this_reg);
23435
23436 fnaddr = XEXP (DECL_RTL (function), 0);
23437 if (TARGET_64BIT)
23438 {
23439 if (!flag_pic || targetm.binds_local_p (function)
23440 || TARGET_PECOFF)
23441 ;
23442 else
23443 {
23444 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
23445 tmp = gen_rtx_CONST (Pmode, tmp);
23446 fnaddr = gen_const_mem (Pmode, tmp);
23447 }
23448 }
23449 else
23450 {
23451 if (!flag_pic || targetm.binds_local_p (function))
23452 ;
23453#if TARGET_MACHO
23454 else if (TARGET_MACHO)
23455 {
23456 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
23457 fnaddr = XEXP (fnaddr, 0);
23458 }
23459#endif /* TARGET_MACHO */
23460 else
23461 {
23462 tmp = gen_rtx_REG (Pmode, CX_REG);
23463 output_set_got (dest: tmp, NULL_RTX);
23464
23465 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
23466 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
23467 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
23468 fnaddr = gen_const_mem (Pmode, fnaddr);
23469 }
23470 }
23471
23472 /* Our sibling call patterns do not allow memories, because we have no
23473 predicate that can distinguish between frame and non-frame memory.
23474 For our purposes here, we can get away with (ab)using a jump pattern,
23475 because we're going to do no optimization. */
23476 if (MEM_P (fnaddr))
23477 {
23478 if (sibcall_insn_operand (fnaddr, word_mode))
23479 {
23480 fnaddr = XEXP (DECL_RTL (function), 0);
23481 tmp = gen_rtx_MEM (QImode, fnaddr);
23482 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23483 tmp = emit_call_insn (tmp);
23484 SIBLING_CALL_P (tmp) = 1;
23485 }
23486 else
23487 emit_jump_insn (gen_indirect_jump (fnaddr));
23488 }
23489 else
23490 {
23491 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
23492 {
23493 // CM_LARGE_PIC always uses pseudo PIC register which is
23494 // uninitialized. Since FUNCTION is local and calling it
23495 // doesn't go through PLT, we use scratch register %r11 as
23496 // PIC register and initialize it here.
23497 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
23498 ix86_init_large_pic_reg (tmp_regno);
23499 fnaddr = legitimize_pic_address (orig: fnaddr,
23500 reg: gen_rtx_REG (Pmode, tmp_regno));
23501 }
23502
23503 if (!sibcall_insn_operand (fnaddr, word_mode))
23504 {
23505 tmp = gen_rtx_REG (word_mode, tmp_regno);
23506 if (GET_MODE (fnaddr) != word_mode)
23507 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
23508 emit_move_insn (tmp, fnaddr);
23509 fnaddr = tmp;
23510 }
23511
23512 tmp = gen_rtx_MEM (QImode, fnaddr);
23513 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
23514 tmp = emit_call_insn (tmp);
23515 SIBLING_CALL_P (tmp) = 1;
23516 }
23517 emit_barrier ();
23518
23519 /* Emit just enough of rest_of_compilation to get the insns emitted. */
23520 insn = get_insns ();
23521 shorten_branches (insn);
23522 assemble_start_function (thunk_fndecl, fnname);
23523 final_start_function (insn, file, 1);
23524 final (insn, file, 1);
23525 final_end_function ();
23526 assemble_end_function (thunk_fndecl, fnname);
23527
23528 flag_force_indirect_call = saved_flag_force_indirect_call;
23529}
23530
23531static void
23532x86_file_start (void)
23533{
23534 default_file_start ();
23535 if (TARGET_16BIT)
23536 fputs (s: "\t.code16gcc\n", stream: asm_out_file);
23537#if TARGET_MACHO
23538 darwin_file_start ();
23539#endif
23540 if (X86_FILE_START_VERSION_DIRECTIVE)
23541 fputs (s: "\t.version\t\"01.01\"\n", stream: asm_out_file);
23542 if (X86_FILE_START_FLTUSED)
23543 fputs (s: "\t.global\t__fltused\n", stream: asm_out_file);
23544 if (ix86_asm_dialect == ASM_INTEL)
23545 fputs (s: "\t.intel_syntax noprefix\n", stream: asm_out_file);
23546}
23547
23548int
23549x86_field_alignment (tree type, int computed)
23550{
23551 machine_mode mode;
23552
23553 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23554 return computed;
23555 if (TARGET_IAMCU)
23556 return iamcu_alignment (type, align: computed);
23557 type = strip_array_types (type);
23558 mode = TYPE_MODE (type);
23559 if (mode == DFmode || mode == DCmode
23560 || GET_MODE_CLASS (mode) == MODE_INT
23561 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23562 {
23563 if (TYPE_ATOMIC (type) && computed > 32)
23564 {
23565 static bool warned;
23566
23567 if (!warned && warn_psabi)
23568 {
23569 const char *url
23570 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
23571
23572 warned = true;
23573 inform (input_location, "the alignment of %<_Atomic %T%> "
23574 "fields changed in %{GCC 11.1%}",
23575 TYPE_MAIN_VARIANT (type), url);
23576 }
23577 }
23578 else
23579 return MIN (32, computed);
23580 }
23581 return computed;
23582}
23583
23584/* Print call to TARGET to FILE. */
23585
23586static void
23587x86_print_call_or_nop (FILE *file, const char *target)
23588{
23589 if (flag_nop_mcount || !strcmp (s1: target, s2: "nop"))
23590 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
23591 fprintf (stream: file, format: "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
23592 else if (!TARGET_PECOFF && flag_pic)
23593 {
23594 gcc_assert (flag_plt);
23595
23596 fprintf (stream: file, format: "1:\tcall\t%s@PLT\n", target);
23597 }
23598 else
23599 fprintf (stream: file, format: "1:\tcall\t%s\n", target);
23600}
23601
23602static bool
23603current_fentry_name (const char **name)
23604{
23605 tree attr = lookup_attribute (attr_name: "fentry_name",
23606 DECL_ATTRIBUTES (current_function_decl));
23607 if (!attr)
23608 return false;
23609 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23610 return true;
23611}
23612
23613static bool
23614current_fentry_section (const char **name)
23615{
23616 tree attr = lookup_attribute (attr_name: "fentry_section",
23617 DECL_ATTRIBUTES (current_function_decl));
23618 if (!attr)
23619 return false;
23620 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23621 return true;
23622}
23623
23624/* Return a caller-saved register which isn't live or a callee-saved
23625 register which has been saved on stack in the prologue at entry for
23626 profile. */
23627
23628static int
23629x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
23630{
23631 /* Use %r10 if the profiler is emitted before the prologue or it isn't
23632 used by DRAP. */
23633 if (ix86_profile_before_prologue ()
23634 || !crtl->drap_reg
23635 || REGNO (crtl->drap_reg) != R10_REG)
23636 return R10_REG;
23637
23638 /* The profiler is emitted after the prologue. If there is a
23639 caller-saved register which isn't live or a callee-saved
23640 register saved on stack in the prologue, use it. */
23641
23642 bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
23643
23644 int i;
23645 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23646 if (GENERAL_REGNO_P (i)
23647 && i != R10_REG
23648#ifdef NO_PROFILE_COUNTERS
23649 && (r11_ok || i != R11_REG)
23650#else
23651 && i != R11_REG
23652#endif
23653 && TEST_HARD_REG_BIT (accessible_reg_set, bit: i)
23654 && (ix86_save_reg (regno: i, maybe_eh_return: true, ignore_outlined: true)
23655 || (call_used_regs[i]
23656 && !fixed_regs[i]
23657 && !REGNO_REG_SET_P (reg_live, i))))
23658 return i;
23659
23660 sorry ("no register available for profiling %<-mcmodel=large%s%>",
23661 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
23662
23663 return R10_REG;
23664}
23665
23666/* Output assembler code to FILE to increment profiler label # LABELNO
23667 for profiling a function entry. */
23668void
23669x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23670{
23671 if (cfun->machine->insn_queued_at_entrance)
23672 {
23673 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
23674 fprintf (stream: file, format: "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
23675 unsigned int patch_area_size
23676 = crtl->patch_area_size - crtl->patch_area_entry;
23677 if (patch_area_size)
23678 ix86_output_patchable_area (patch_area_size,
23679 crtl->patch_area_entry == 0);
23680 }
23681
23682 const char *mcount_name = MCOUNT_NAME;
23683
23684 if (current_fentry_name (name: &mcount_name))
23685 ;
23686 else if (fentry_name)
23687 mcount_name = fentry_name;
23688 else if (flag_fentry)
23689 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
23690
23691 if (TARGET_64BIT)
23692 {
23693#ifndef NO_PROFILE_COUNTERS
23694 if (ASSEMBLER_DIALECT == ASM_INTEL)
23695 fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
23696 else
23697 fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
23698#endif
23699
23700 int scratch;
23701 const char *reg;
23702 char legacy_reg[4] = { 0 };
23703
23704 if (!TARGET_PECOFF)
23705 {
23706 switch (ix86_cmodel)
23707 {
23708 case CM_LARGE:
23709 scratch = x86_64_select_profile_regnum (r11_ok: true);
23710 reg = hi_reg_name[scratch];
23711 if (LEGACY_INT_REGNO_P (scratch))
23712 {
23713 legacy_reg[0] = 'r';
23714 legacy_reg[1] = reg[0];
23715 legacy_reg[2] = reg[1];
23716 reg = legacy_reg;
23717 }
23718 if (ASSEMBLER_DIALECT == ASM_INTEL)
23719 fprintf (stream: file, format: "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
23720 "\tcall\t%s\n", reg, mcount_name, reg);
23721 else
23722 fprintf (stream: file, format: "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
23723 mcount_name, reg, reg);
23724 break;
23725 case CM_LARGE_PIC:
23726#ifdef NO_PROFILE_COUNTERS
23727 scratch = x86_64_select_profile_regnum (r11_ok: false);
23728 reg = hi_reg_name[scratch];
23729 if (LEGACY_INT_REGNO_P (scratch))
23730 {
23731 legacy_reg[0] = 'r';
23732 legacy_reg[1] = reg[0];
23733 legacy_reg[2] = reg[1];
23734 reg = legacy_reg;
23735 }
23736 if (ASSEMBLER_DIALECT == ASM_INTEL)
23737 {
23738 fprintf (stream: file, format: "1:movabs\tr11, "
23739 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
23740 fprintf (stream: file, format: "\tlea\t%s, 1b[rip]\n", reg);
23741 fprintf (stream: file, format: "\tadd\t%s, r11\n", reg);
23742 fprintf (stream: file, format: "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
23743 mcount_name);
23744 fprintf (stream: file, format: "\tadd\t%s, r11\n", reg);
23745 fprintf (stream: file, format: "\tcall\t%s\n", reg);
23746 break;
23747 }
23748 fprintf (stream: file,
23749 format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
23750 fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%%s\n", reg);
23751 fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg);
23752 fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
23753 fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg);
23754 fprintf (stream: file, format: "\tcall\t*%%%s\n", reg);
23755#else
23756 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
23757#endif
23758 break;
23759 case CM_SMALL_PIC:
23760 case CM_MEDIUM_PIC:
23761 if (!flag_plt)
23762 {
23763 if (ASSEMBLER_DIALECT == ASM_INTEL)
23764 fprintf (stream: file, format: "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
23765 mcount_name);
23766 else
23767 fprintf (stream: file, format: "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
23768 mcount_name);
23769 break;
23770 }
23771 /* fall through */
23772 default:
23773 x86_print_call_or_nop (file, target: mcount_name);
23774 break;
23775 }
23776 }
23777 else
23778 x86_print_call_or_nop (file, target: mcount_name);
23779 }
23780 else if (flag_pic)
23781 {
23782#ifndef NO_PROFILE_COUNTERS
23783 if (ASSEMBLER_DIALECT == ASM_INTEL)
23784 fprintf (file,
23785 "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
23786 LPREFIX, labelno);
23787 else
23788 fprintf (file,
23789 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
23790 LPREFIX, labelno);
23791#endif
23792 if (flag_plt)
23793 x86_print_call_or_nop (file, target: mcount_name);
23794 else if (ASSEMBLER_DIALECT == ASM_INTEL)
23795 fprintf (stream: file, format: "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
23796 else
23797 fprintf (stream: file, format: "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
23798 }
23799 else
23800 {
23801#ifndef NO_PROFILE_COUNTERS
23802 if (ASSEMBLER_DIALECT == ASM_INTEL)
23803 fprintf (file,
23804 "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
23805 LPREFIX, labelno);
23806 else
23807 fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
23808 LPREFIX, labelno);
23809#endif
23810 x86_print_call_or_nop (file, target: mcount_name);
23811 }
23812
23813 if (flag_record_mcount
23814 || lookup_attribute (attr_name: "fentry_section",
23815 DECL_ATTRIBUTES (current_function_decl)))
23816 {
23817 const char *sname = "__mcount_loc";
23818
23819 if (current_fentry_section (name: &sname))
23820 ;
23821 else if (fentry_section)
23822 sname = fentry_section;
23823
23824 fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n", sname);
23825 fprintf (stream: file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
23826 fprintf (stream: file, format: "\t.previous\n");
23827 }
23828}
23829
23830/* We don't have exact information about the insn sizes, but we may assume
23831 quite safely that we are informed about all 1 byte insns and memory
23832 address sizes. This is enough to eliminate unnecessary padding in
23833 99% of cases. */
23834
23835int
23836ix86_min_insn_size (rtx_insn *insn)
23837{
23838 int l = 0, len;
23839
23840 if (!INSN_P (insn) || !active_insn_p (insn))
23841 return 0;
23842
23843 /* Discard alignments we've emit and jump instructions. */
23844 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23845 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23846 return 0;
23847
23848 /* Important case - calls are always 5 bytes.
23849 It is common to have many calls in the row. */
23850 if (CALL_P (insn)
23851 && symbolic_reference_mentioned_p (op: PATTERN (insn))
23852 && !SIBLING_CALL_P (insn))
23853 return 5;
23854 len = get_attr_length (insn);
23855 if (len <= 1)
23856 return 1;
23857
23858 /* For normal instructions we rely on get_attr_length being exact,
23859 with a few exceptions. */
23860 if (!JUMP_P (insn))
23861 {
23862 enum attr_type type = get_attr_type (insn);
23863
23864 switch (type)
23865 {
23866 case TYPE_MULTI:
23867 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
23868 || asm_noperands (PATTERN (insn)) >= 0)
23869 return 0;
23870 break;
23871 case TYPE_OTHER:
23872 case TYPE_FCMP:
23873 break;
23874 default:
23875 /* Otherwise trust get_attr_length. */
23876 return len;
23877 }
23878
23879 l = get_attr_length_address (insn);
23880 if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn)))
23881 l = 4;
23882 }
23883 if (l)
23884 return 1+l;
23885 else
23886 return 2;
23887}
23888
23889#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23890
23891/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23892 window. */
23893
23894static void
23895ix86_avoid_jump_mispredicts (void)
23896{
23897 rtx_insn *insn, *start = get_insns ();
23898 int nbytes = 0, njumps = 0;
23899 bool isjump = false;
23900
23901 /* Look for all minimal intervals of instructions containing 4 jumps.
23902 The intervals are bounded by START and INSN. NBYTES is the total
23903 size of instructions in the interval including INSN and not including
23904 START. When the NBYTES is smaller than 16 bytes, it is possible
23905 that the end of START and INSN ends up in the same 16byte page.
23906
23907 The smallest offset in the page INSN can start is the case where START
23908 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23909 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
23910
23911 Don't consider asm goto as jump, while it can contain a jump, it doesn't
23912 have to, control transfer to label(s) can be performed through other
23913 means, and also we estimate minimum length of all asm stmts as 0. */
23914 for (insn = start; insn; insn = NEXT_INSN (insn))
23915 {
23916 int min_size;
23917
23918 if (LABEL_P (insn))
23919 {
23920 align_flags alignment = label_to_alignment (insn);
23921 int align = alignment.levels[0].log;
23922 int max_skip = alignment.levels[0].maxskip;
23923
23924 if (max_skip > 15)
23925 max_skip = 15;
23926 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
23927 already in the current 16 byte page, because otherwise
23928 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
23929 bytes to reach 16 byte boundary. */
23930 if (align <= 0
23931 || (align <= 3 && max_skip != (1 << align) - 1))
23932 max_skip = 0;
23933 if (dump_file)
23934 fprintf (stream: dump_file, format: "Label %i with max_skip %i\n",
23935 INSN_UID (insn), max_skip);
23936 if (max_skip)
23937 {
23938 while (nbytes + max_skip >= 16)
23939 {
23940 start = NEXT_INSN (insn: start);
23941 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
23942 || CALL_P (start))
23943 njumps--, isjump = true;
23944 else
23945 isjump = false;
23946 nbytes -= ix86_min_insn_size (insn: start);
23947 }
23948 }
23949 continue;
23950 }
23951
23952 min_size = ix86_min_insn_size (insn);
23953 nbytes += min_size;
23954 if (dump_file)
23955 fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n",
23956 INSN_UID (insn), min_size);
23957 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
23958 || CALL_P (insn))
23959 njumps++;
23960 else
23961 continue;
23962
23963 while (njumps > 3)
23964 {
23965 start = NEXT_INSN (insn: start);
23966 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
23967 || CALL_P (start))
23968 njumps--, isjump = true;
23969 else
23970 isjump = false;
23971 nbytes -= ix86_min_insn_size (insn: start);
23972 }
23973 gcc_assert (njumps >= 0);
23974 if (dump_file)
23975 fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n",
23976 INSN_UID (insn: start), INSN_UID (insn), nbytes);
23977
23978 if (njumps == 3 && isjump && nbytes < 16)
23979 {
23980 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
23981
23982 if (dump_file)
23983 fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n",
23984 INSN_UID (insn), padsize);
23985 emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
23986 }
23987 }
23988}
23989#endif
23990
23991/* AMD Athlon works faster
23992 when RET is not destination of conditional jump or directly preceded
23993 by other jump instruction. We avoid the penalty by inserting NOP just
23994 before the RET instructions in such cases. */
23995static void
23996ix86_pad_returns (void)
23997{
23998 edge e;
23999 edge_iterator ei;
24000
24001 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24002 {
24003 basic_block bb = e->src;
24004 rtx_insn *ret = BB_END (bb);
24005 rtx_insn *prev;
24006 bool replace = false;
24007
24008 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
24009 || optimize_bb_for_size_p (bb))
24010 continue;
24011 for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev))
24012 if (active_insn_p (prev) || LABEL_P (prev))
24013 break;
24014 if (prev && LABEL_P (prev))
24015 {
24016 edge e;
24017 edge_iterator ei;
24018
24019 FOR_EACH_EDGE (e, ei, bb->preds)
24020 if (EDGE_FREQUENCY (e) && e->src->index >= 0
24021 && !(e->flags & EDGE_FALLTHRU))
24022 {
24023 replace = true;
24024 break;
24025 }
24026 }
24027 if (!replace)
24028 {
24029 prev = prev_active_insn (ret);
24030 if (prev
24031 && ((JUMP_P (prev) && any_condjump_p (prev))
24032 || CALL_P (prev)))
24033 replace = true;
24034 /* Empty functions get branch mispredict even when
24035 the jump destination is not visible to us. */
24036 if (!prev && !optimize_function_for_size_p (cfun))
24037 replace = true;
24038 }
24039 if (replace)
24040 {
24041 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
24042 delete_insn (ret);
24043 }
24044 }
24045}
24046
24047/* Count the minimum number of instructions in BB. Return 4 if the
24048 number of instructions >= 4. */
24049
24050static int
24051ix86_count_insn_bb (basic_block bb)
24052{
24053 rtx_insn *insn;
24054 int insn_count = 0;
24055
24056 /* Count number of instructions in this block. Return 4 if the number
24057 of instructions >= 4. */
24058 FOR_BB_INSNS (bb, insn)
24059 {
24060 /* Only happen in exit blocks. */
24061 if (JUMP_P (insn)
24062 && ANY_RETURN_P (PATTERN (insn)))
24063 break;
24064
24065 if (NONDEBUG_INSN_P (insn)
24066 && GET_CODE (PATTERN (insn)) != USE
24067 && GET_CODE (PATTERN (insn)) != CLOBBER)
24068 {
24069 insn_count++;
24070 if (insn_count >= 4)
24071 return insn_count;
24072 }
24073 }
24074
24075 return insn_count;
24076}
24077
24078
24079/* Count the minimum number of instructions in code path in BB.
24080 Return 4 if the number of instructions >= 4. */
24081
24082static int
24083ix86_count_insn (basic_block bb)
24084{
24085 edge e;
24086 edge_iterator ei;
24087 int min_prev_count;
24088
24089 /* Only bother counting instructions along paths with no
24090 more than 2 basic blocks between entry and exit. Given
24091 that BB has an edge to exit, determine if a predecessor
24092 of BB has an edge from entry. If so, compute the number
24093 of instructions in the predecessor block. If there
24094 happen to be multiple such blocks, compute the minimum. */
24095 min_prev_count = 4;
24096 FOR_EACH_EDGE (e, ei, bb->preds)
24097 {
24098 edge prev_e;
24099 edge_iterator prev_ei;
24100
24101 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24102 {
24103 min_prev_count = 0;
24104 break;
24105 }
24106 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
24107 {
24108 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
24109 {
24110 int count = ix86_count_insn_bb (bb: e->src);
24111 if (count < min_prev_count)
24112 min_prev_count = count;
24113 break;
24114 }
24115 }
24116 }
24117
24118 if (min_prev_count < 4)
24119 min_prev_count += ix86_count_insn_bb (bb);
24120
24121 return min_prev_count;
24122}
24123
24124/* Pad short function to 4 instructions. */
24125
24126static void
24127ix86_pad_short_function (void)
24128{
24129 edge e;
24130 edge_iterator ei;
24131
24132 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24133 {
24134 rtx_insn *ret = BB_END (e->src);
24135 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
24136 {
24137 int insn_count = ix86_count_insn (bb: e->src);
24138
24139 /* Pad short function. */
24140 if (insn_count < 4)
24141 {
24142 rtx_insn *insn = ret;
24143
24144 /* Find epilogue. */
24145 while (insn
24146 && (!NOTE_P (insn)
24147 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
24148 insn = PREV_INSN (insn);
24149
24150 if (!insn)
24151 insn = ret;
24152
24153 /* Two NOPs count as one instruction. */
24154 insn_count = 2 * (4 - insn_count);
24155 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
24156 }
24157 }
24158 }
24159}
24160
24161/* Fix up a Windows system unwinder issue. If an EH region falls through into
24162 the epilogue, the Windows system unwinder will apply epilogue logic and
24163 produce incorrect offsets. This can be avoided by adding a nop between
24164 the last insn that can throw and the first insn of the epilogue. */
24165
24166static void
24167ix86_seh_fixup_eh_fallthru (void)
24168{
24169 edge e;
24170 edge_iterator ei;
24171
24172 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
24173 {
24174 rtx_insn *insn, *next;
24175
24176 /* Find the beginning of the epilogue. */
24177 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
24178 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
24179 break;
24180 if (insn == NULL)
24181 continue;
24182
24183 /* We only care about preceding insns that can throw. */
24184 insn = prev_active_insn (insn);
24185 if (insn == NULL || !can_throw_internal (insn))
24186 continue;
24187
24188 /* Do not separate calls from their debug information. */
24189 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next))
24190 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
24191 insn = next;
24192 else
24193 break;
24194
24195 emit_insn_after (gen_nops (const1_rtx), insn);
24196 }
24197}
24198/* Split vector load from parm_decl to elemental loads to avoid STLF
24199 stalls. */
24200static void
24201ix86_split_stlf_stall_load ()
24202{
24203 rtx_insn* insn, *start = get_insns ();
24204 unsigned window = 0;
24205
24206 for (insn = start; insn; insn = NEXT_INSN (insn))
24207 {
24208 if (!NONDEBUG_INSN_P (insn))
24209 continue;
24210 window++;
24211 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
24212 other, just emulate for pipeline) before stalled load, stlf stall
24213 case is as fast as no stall cases on CLX.
24214 Since CFG is freed before machine_reorg, just do a rough
24215 calculation of the window according to the layout. */
24216 if (window > (unsigned) x86_stlf_window_ninsns)
24217 return;
24218
24219 if (any_uncondjump_p (insn)
24220 || ANY_RETURN_P (PATTERN (insn))
24221 || CALL_P (insn))
24222 return;
24223
24224 rtx set = single_set (insn);
24225 if (!set)
24226 continue;
24227 rtx src = SET_SRC (set);
24228 if (!MEM_P (src)
24229 /* Only handle V2DFmode load since it doesn't need any scratch
24230 register. */
24231 || GET_MODE (src) != E_V2DFmode
24232 || !MEM_EXPR (src)
24233 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
24234 continue;
24235
24236 rtx zero = CONST0_RTX (V2DFmode);
24237 rtx dest = SET_DEST (set);
24238 rtx m = adjust_address (src, DFmode, 0);
24239 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
24240 emit_insn_before (loadlpd, insn);
24241 m = adjust_address (src, DFmode, 8);
24242 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
24243 if (dump_file && (dump_flags & TDF_DETAILS))
24244 {
24245 fputs (s: "Due to potential STLF stall, split instruction:\n",
24246 stream: dump_file);
24247 print_rtl_single (dump_file, insn);
24248 fputs (s: "To:\n", stream: dump_file);
24249 print_rtl_single (dump_file, loadlpd);
24250 print_rtl_single (dump_file, loadhpd);
24251 }
24252 PATTERN (insn) = loadhpd;
24253 INSN_CODE (insn) = -1;
24254 gcc_assert (recog_memoized (insn) != -1);
24255 }
24256}
24257
24258/* Implement machine specific optimizations. We implement padding of returns
24259 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24260static void
24261ix86_reorg (void)
24262{
24263 /* We are freeing block_for_insn in the toplev to keep compatibility
24264 with old MDEP_REORGS that are not CFG based. Recompute it now. */
24265 compute_bb_for_insn ();
24266
24267 if (TARGET_SEH && current_function_has_exception_handlers ())
24268 ix86_seh_fixup_eh_fallthru ();
24269
24270 if (optimize && optimize_function_for_speed_p (cfun))
24271 {
24272 if (TARGET_SSE2)
24273 ix86_split_stlf_stall_load ();
24274 if (TARGET_PAD_SHORT_FUNCTION)
24275 ix86_pad_short_function ();
24276 else if (TARGET_PAD_RETURNS)
24277 ix86_pad_returns ();
24278#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
24279 if (TARGET_FOUR_JUMP_LIMIT)
24280 ix86_avoid_jump_mispredicts ();
24281#endif
24282 }
24283}
24284
24285/* Return nonzero when QImode register that must be represented via REX prefix
24286 is used. */
24287bool
24288x86_extended_QIreg_mentioned_p (rtx_insn *insn)
24289{
24290 int i;
24291 extract_insn_cached (insn);
24292 for (i = 0; i < recog_data.n_operands; i++)
24293 if (GENERAL_REG_P (recog_data.operand[i])
24294 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
24295 return true;
24296 return false;
24297}
24298
24299/* Return true when INSN mentions register that must be encoded using REX
24300 prefix. */
24301bool
24302x86_extended_reg_mentioned_p (rtx insn)
24303{
24304 subrtx_iterator::array_type array;
24305 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24306 {
24307 const_rtx x = *iter;
24308 if (REG_P (x)
24309 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
24310 || REX2_INT_REGNO_P (REGNO (x))))
24311 return true;
24312 }
24313 return false;
24314}
24315
24316/* Return true when INSN mentions register that must be encoded using REX2
24317 prefix. */
24318bool
24319x86_extended_rex2reg_mentioned_p (rtx insn)
24320{
24321 subrtx_iterator::array_type array;
24322 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
24323 {
24324 const_rtx x = *iter;
24325 if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
24326 return true;
24327 }
24328 return false;
24329}
24330
24331/* Return true when rtx operands mentions register that must be encoded using
24332 evex prefix. */
24333bool
24334x86_evex_reg_mentioned_p (rtx operands[], int nops)
24335{
24336 int i;
24337 for (i = 0; i < nops; i++)
24338 if (EXT_REX_SSE_REG_P (operands[i])
24339 || x86_extended_rex2reg_mentioned_p (insn: operands[i]))
24340 return true;
24341 return false;
24342}
24343
24344/* If profitable, negate (without causing overflow) integer constant
24345 of mode MODE at location LOC. Return true in this case. */
24346bool
24347x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
24348{
24349 HOST_WIDE_INT val;
24350
24351 if (!CONST_INT_P (*loc))
24352 return false;
24353
24354 switch (mode)
24355 {
24356 case E_DImode:
24357 /* DImode x86_64 constants must fit in 32 bits. */
24358 gcc_assert (x86_64_immediate_operand (*loc, mode));
24359
24360 mode = SImode;
24361 break;
24362
24363 case E_SImode:
24364 case E_HImode:
24365 case E_QImode:
24366 break;
24367
24368 default:
24369 gcc_unreachable ();
24370 }
24371
24372 /* Avoid overflows. */
24373 if (mode_signbit_p (mode, *loc))
24374 return false;
24375
24376 val = INTVAL (*loc);
24377
24378 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
24379 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
24380 if ((val < 0 && val != -128)
24381 || val == 128)
24382 {
24383 *loc = GEN_INT (-val);
24384 return true;
24385 }
24386
24387 return false;
24388}
24389
24390/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24391 optabs would emit if we didn't have TFmode patterns. */
24392
24393void
24394x86_emit_floatuns (rtx operands[2])
24395{
24396 rtx_code_label *neglab, *donelab;
24397 rtx i0, i1, f0, in, out;
24398 machine_mode mode, inmode;
24399
24400 inmode = GET_MODE (operands[1]);
24401 gcc_assert (inmode == SImode || inmode == DImode);
24402
24403 out = operands[0];
24404 in = force_reg (inmode, operands[1]);
24405 mode = GET_MODE (out);
24406 neglab = gen_label_rtx ();
24407 donelab = gen_label_rtx ();
24408 f0 = gen_reg_rtx (mode);
24409
24410 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24411
24412 expand_float (out, in, 0);
24413
24414 emit_jump_insn (gen_jump (donelab));
24415 emit_barrier ();
24416
24417 emit_label (neglab);
24418
24419 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24420 1, OPTAB_DIRECT);
24421 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24422 1, OPTAB_DIRECT);
24423 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24424
24425 expand_float (f0, i0, 0);
24426
24427 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
24428
24429 emit_label (donelab);
24430}
24431
24432/* Return the diagnostic message string if conversion from FROMTYPE to
24433 TOTYPE is not allowed, NULL otherwise. */
24434
24435static const char *
24436ix86_invalid_conversion (const_tree fromtype, const_tree totype)
24437{
24438 machine_mode from_mode = element_mode (fromtype);
24439 machine_mode to_mode = element_mode (totype);
24440
24441 if (!TARGET_SSE2 && from_mode != to_mode)
24442 {
24443 /* Do no allow conversions to/from BFmode/HFmode scalar types
24444 when TARGET_SSE2 is not available. */
24445 if (from_mode == BFmode)
24446 return N_("invalid conversion from type %<__bf16%> "
24447 "without option %<-msse2%>");
24448 if (from_mode == HFmode)
24449 return N_("invalid conversion from type %<_Float16%> "
24450 "without option %<-msse2%>");
24451 if (to_mode == BFmode)
24452 return N_("invalid conversion to type %<__bf16%> "
24453 "without option %<-msse2%>");
24454 if (to_mode == HFmode)
24455 return N_("invalid conversion to type %<_Float16%> "
24456 "without option %<-msse2%>");
24457 }
24458
24459 /* Warn for silent implicit conversion between __bf16 and short,
24460 since __bfloat16 is refined as real __bf16 instead of short
24461 since GCC13. */
24462 if (element_mode (fromtype) != element_mode (totype)
24463 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
24464 {
24465 /* Warn for silent implicit conversion where user may expect
24466 a bitcast. */
24467 if ((TYPE_MODE (fromtype) == BFmode
24468 && TYPE_MODE (totype) == HImode)
24469 || (TYPE_MODE (totype) == BFmode
24470 && TYPE_MODE (fromtype) == HImode))
24471 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
24472 "to real %<__bf16%> since GCC 13.1, be careful of "
24473 "implicit conversion between %<__bf16%> and %<short%>; "
24474 "an explicit bitcast may be needed here");
24475 }
24476
24477 /* Conversion allowed. */
24478 return NULL;
24479}
24480
24481/* Return the diagnostic message string if the unary operation OP is
24482 not permitted on TYPE, NULL otherwise. */
24483
24484static const char *
24485ix86_invalid_unary_op (int op, const_tree type)
24486{
24487 machine_mode mmode = element_mode (type);
24488 /* Reject all single-operand operations on BFmode/HFmode except for &
24489 when TARGET_SSE2 is not available. */
24490 if (!TARGET_SSE2 && op != ADDR_EXPR)
24491 {
24492 if (mmode == BFmode)
24493 return N_("operation not permitted on type %<__bf16%> "
24494 "without option %<-msse2%>");
24495 if (mmode == HFmode)
24496 return N_("operation not permitted on type %<_Float16%> "
24497 "without option %<-msse2%>");
24498 }
24499
24500 /* Operation allowed. */
24501 return NULL;
24502}
24503
24504/* Return the diagnostic message string if the binary operation OP is
24505 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24506
24507static const char *
24508ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
24509 const_tree type2)
24510{
24511 machine_mode type1_mode = element_mode (type1);
24512 machine_mode type2_mode = element_mode (type2);
24513 /* Reject all 2-operand operations on BFmode or HFmode
24514 when TARGET_SSE2 is not available. */
24515 if (!TARGET_SSE2)
24516 {
24517 if (type1_mode == BFmode || type2_mode == BFmode)
24518 return N_("operation not permitted on type %<__bf16%> "
24519 "without option %<-msse2%>");
24520
24521 if (type1_mode == HFmode || type2_mode == HFmode)
24522 return N_("operation not permitted on type %<_Float16%> "
24523 "without option %<-msse2%>");
24524 }
24525
24526 /* Operation allowed. */
24527 return NULL;
24528}
24529
24530
24531/* Target hook for scalar_mode_supported_p. */
24532static bool
24533ix86_scalar_mode_supported_p (scalar_mode mode)
24534{
24535 if (DECIMAL_FLOAT_MODE_P (mode))
24536 return default_decimal_float_supported_p ();
24537 else if (mode == TFmode)
24538 return true;
24539 else if (mode == HFmode || mode == BFmode)
24540 return true;
24541 else
24542 return default_scalar_mode_supported_p (mode);
24543}
24544
24545/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
24546 if MODE is HFmode, and punt to the generic implementation otherwise. */
24547
24548static bool
24549ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24550{
24551 /* NB: Always return TRUE for HFmode so that the _Float16 type will
24552 be defined by the C front-end for AVX512FP16 intrinsics. We will
24553 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
24554 enabled. */
24555 return ((mode == HFmode || mode == BFmode)
24556 ? true
24557 : default_libgcc_floating_mode_supported_p (mode));
24558}
24559
24560/* Implements target hook vector_mode_supported_p. */
24561static bool
24562ix86_vector_mode_supported_p (machine_mode mode)
24563{
24564 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
24565 either. */
24566 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
24567 return false;
24568 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24569 return true;
24570 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24571 return true;
24572 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
24573 return true;
24574 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
24575 return true;
24576 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
24577 && VALID_MMX_REG_MODE (mode))
24578 return true;
24579 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
24580 && VALID_MMX_REG_MODE_3DNOW (mode))
24581 return true;
24582 if (mode == V2QImode)
24583 return true;
24584 return false;
24585}
24586
24587/* Target hook for c_mode_for_suffix. */
24588static machine_mode
24589ix86_c_mode_for_suffix (char suffix)
24590{
24591 if (suffix == 'q')
24592 return TFmode;
24593 if (suffix == 'w')
24594 return XFmode;
24595
24596 return VOIDmode;
24597}
24598
24599/* Helper function to map common constraints to non-EGPR ones.
24600 All related constraints have h prefix, and h plus Upper letter
24601 means the constraint is strictly EGPR enabled, while h plus
24602 lower letter indicates the constraint is strictly gpr16 only.
24603
24604 Specially for "g" constraint, split it to rmi as there is
24605 no corresponding general constraint define for backend.
24606
24607 Here is the full list to map constraints that may involve
24608 gpr to h prefixed.
24609
24610 "g" -> "jrjmi"
24611 "r" -> "jr"
24612 "m" -> "jm"
24613 "<" -> "j<"
24614 ">" -> "j>"
24615 "o" -> "jo"
24616 "V" -> "jV"
24617 "p" -> "jp"
24618 "Bm" -> "ja"
24619*/
24620
24621static void map_egpr_constraints (vec<const char *> &constraints)
24622{
24623 for (size_t i = 0; i < constraints.length(); i++)
24624 {
24625 const char *cur = constraints[i];
24626
24627 if (startswith (str: cur, prefix: "=@cc"))
24628 continue;
24629
24630 int len = strlen (s: cur);
24631 auto_vec<char> buf;
24632
24633 for (int j = 0; j < len; j++)
24634 {
24635 switch (cur[j])
24636 {
24637 case 'g':
24638 buf.safe_push (obj: 'j');
24639 buf.safe_push (obj: 'r');
24640 buf.safe_push (obj: 'j');
24641 buf.safe_push (obj: 'm');
24642 buf.safe_push (obj: 'i');
24643 break;
24644 case 'r':
24645 case 'm':
24646 case '<':
24647 case '>':
24648 case 'o':
24649 case 'V':
24650 case 'p':
24651 buf.safe_push (obj: 'j');
24652 buf.safe_push (obj: cur[j]);
24653 break;
24654 case 'B':
24655 if (cur[j + 1] == 'm')
24656 {
24657 buf.safe_push (obj: 'j');
24658 buf.safe_push (obj: 'a');
24659 j++;
24660 }
24661 else
24662 {
24663 buf.safe_push (obj: cur[j]);
24664 buf.safe_push (obj: cur[j + 1]);
24665 j++;
24666 }
24667 break;
24668 case 'T':
24669 case 'Y':
24670 case 'W':
24671 case 'j':
24672 buf.safe_push (obj: cur[j]);
24673 buf.safe_push (obj: cur[j + 1]);
24674 j++;
24675 break;
24676 default:
24677 buf.safe_push (obj: cur[j]);
24678 break;
24679 }
24680 }
24681 buf.safe_push (obj: '\0');
24682 constraints[i] = xstrdup (buf.address ());
24683 }
24684}
24685
24686/* Worker function for TARGET_MD_ASM_ADJUST.
24687
24688 We implement asm flag outputs, and maintain source compatibility
24689 with the old cc0-based compiler. */
24690
24691static rtx_insn *
24692ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
24693 vec<machine_mode> & /*input_modes*/,
24694 vec<const char *> &constraints, vec<rtx> &/*uses*/,
24695 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
24696 location_t loc)
24697{
24698 bool saw_asm_flag = false;
24699
24700 start_sequence ();
24701
24702 if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
24703 map_egpr_constraints (constraints);
24704
24705 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
24706 {
24707 const char *con = constraints[i];
24708 if (!startswith (str: con, prefix: "=@cc"))
24709 continue;
24710 con += 4;
24711 if (strchr (s: con, c: ',') != NULL)
24712 {
24713 error_at (loc, "alternatives not allowed in %<asm%> flag output");
24714 continue;
24715 }
24716
24717 bool invert = false;
24718 if (con[0] == 'n')
24719 invert = true, con++;
24720
24721 machine_mode mode = CCmode;
24722 rtx_code code = UNKNOWN;
24723
24724 switch (con[0])
24725 {
24726 case 'a':
24727 if (con[1] == 0)
24728 mode = CCAmode, code = EQ;
24729 else if (con[1] == 'e' && con[2] == 0)
24730 mode = CCCmode, code = NE;
24731 break;
24732 case 'b':
24733 if (con[1] == 0)
24734 mode = CCCmode, code = EQ;
24735 else if (con[1] == 'e' && con[2] == 0)
24736 mode = CCAmode, code = NE;
24737 break;
24738 case 'c':
24739 if (con[1] == 0)
24740 mode = CCCmode, code = EQ;
24741 break;
24742 case 'e':
24743 if (con[1] == 0)
24744 mode = CCZmode, code = EQ;
24745 break;
24746 case 'g':
24747 if (con[1] == 0)
24748 mode = CCGCmode, code = GT;
24749 else if (con[1] == 'e' && con[2] == 0)
24750 mode = CCGCmode, code = GE;
24751 break;
24752 case 'l':
24753 if (con[1] == 0)
24754 mode = CCGCmode, code = LT;
24755 else if (con[1] == 'e' && con[2] == 0)
24756 mode = CCGCmode, code = LE;
24757 break;
24758 case 'o':
24759 if (con[1] == 0)
24760 mode = CCOmode, code = EQ;
24761 break;
24762 case 'p':
24763 if (con[1] == 0)
24764 mode = CCPmode, code = EQ;
24765 break;
24766 case 's':
24767 if (con[1] == 0)
24768 mode = CCSmode, code = EQ;
24769 break;
24770 case 'z':
24771 if (con[1] == 0)
24772 mode = CCZmode, code = EQ;
24773 break;
24774 }
24775 if (code == UNKNOWN)
24776 {
24777 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
24778 continue;
24779 }
24780 if (invert)
24781 code = reverse_condition (code);
24782
24783 rtx dest = outputs[i];
24784 if (!saw_asm_flag)
24785 {
24786 /* This is the first asm flag output. Here we put the flags
24787 register in as the real output and adjust the condition to
24788 allow it. */
24789 constraints[i] = "=Bf";
24790 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
24791 saw_asm_flag = true;
24792 }
24793 else
24794 {
24795 /* We don't need the flags register as output twice. */
24796 constraints[i] = "=X";
24797 outputs[i] = gen_rtx_SCRATCH (SImode);
24798 }
24799
24800 rtx x = gen_rtx_REG (mode, FLAGS_REG);
24801 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
24802
24803 machine_mode dest_mode = GET_MODE (dest);
24804 if (!SCALAR_INT_MODE_P (dest_mode))
24805 {
24806 error_at (loc, "invalid type for %<asm%> flag output");
24807 continue;
24808 }
24809
24810 if (dest_mode == QImode)
24811 emit_insn (gen_rtx_SET (dest, x));
24812 else
24813 {
24814 rtx reg = gen_reg_rtx (QImode);
24815 emit_insn (gen_rtx_SET (reg, x));
24816
24817 reg = convert_to_mode (dest_mode, reg, 1);
24818 emit_move_insn (dest, reg);
24819 }
24820 }
24821
24822 rtx_insn *seq = end_sequence ();
24823
24824 if (saw_asm_flag)
24825 return seq;
24826 else
24827 {
24828 /* If we had no asm flag outputs, clobber the flags. */
24829 clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG));
24830 SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG);
24831 return NULL;
24832 }
24833}
24834
24835/* Implements target vector targetm.asm.encode_section_info. */
24836
24837static void ATTRIBUTE_UNUSED
24838ix86_encode_section_info (tree decl, rtx rtl, int first)
24839{
24840 default_encode_section_info (decl, rtl, first);
24841
24842 if (ix86_in_large_data_p (exp: decl))
24843 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24844}
24845
24846/* Worker function for REVERSE_CONDITION. */
24847
24848enum rtx_code
24849ix86_reverse_condition (enum rtx_code code, machine_mode mode)
24850{
24851 return (mode == CCFPmode
24852 ? reverse_condition_maybe_unordered (code)
24853 : reverse_condition (code));
24854}
24855
24856/* Output code to perform an x87 FP register move, from OPERANDS[1]
24857 to OPERANDS[0]. */
24858
24859const char *
24860output_387_reg_move (rtx_insn *insn, rtx *operands)
24861{
24862 if (REG_P (operands[0]))
24863 {
24864 if (REG_P (operands[1])
24865 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24866 {
24867 if (REGNO (operands[0]) == FIRST_STACK_REG)
24868 return output_387_ffreep (operands, opno: 0);
24869 return "fstp\t%y0";
24870 }
24871 if (STACK_TOP_P (operands[0]))
24872 return "fld%Z1\t%y1";
24873 return "fst\t%y0";
24874 }
24875 else if (MEM_P (operands[0]))
24876 {
24877 gcc_assert (REG_P (operands[1]));
24878 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24879 return "fstp%Z0\t%y0";
24880 else
24881 {
24882 /* There is no non-popping store to memory for XFmode.
24883 So if we need one, follow the store with a load. */
24884 if (GET_MODE (operands[0]) == XFmode)
24885 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
24886 else
24887 return "fst%Z0\t%y0";
24888 }
24889 }
24890 else
24891 gcc_unreachable();
24892}
24893#ifdef TARGET_SOLARIS
24894/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24895
24896static void
24897i386_solaris_elf_named_section (const char *name, unsigned int flags,
24898 tree decl)
24899{
24900 /* With Binutils 2.15, the "@unwind" marker must be specified on
24901 every occurrence of the ".eh_frame" section, not just the first
24902 one. */
24903 if (TARGET_64BIT
24904 && strcmp (name, ".eh_frame") == 0)
24905 {
24906 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24907 flags & SECTION_WRITE ? "aw" : "a");
24908 return;
24909 }
24910
24911#ifndef USE_GAS
24912 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
24913 {
24914 solaris_elf_asm_comdat_section (name, flags, decl);
24915 return;
24916 }
24917
24918 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
24919 SPARC assembler. One cannot mix single-letter flags and #exclude, so
24920 only emit the latter here. */
24921 if (flags & SECTION_EXCLUDE)
24922 {
24923 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
24924 return;
24925 }
24926#endif
24927
24928 default_elf_asm_named_section (name, flags, decl);
24929}
24930#endif /* TARGET_SOLARIS */
24931
24932/* Return the mangling of TYPE if it is an extended fundamental type. */
24933
24934static const char *
24935ix86_mangle_type (const_tree type)
24936{
24937 type = TYPE_MAIN_VARIANT (type);
24938
24939 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24940 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24941 return NULL;
24942
24943 if (type == float128_type_node || type == float64x_type_node)
24944 return NULL;
24945
24946 switch (TYPE_MODE (type))
24947 {
24948 case E_BFmode:
24949 return "DF16b";
24950 case E_HFmode:
24951 /* _Float16 is "DF16_".
24952 Align with clang's decision in https://reviews.llvm.org/D33719. */
24953 return "DF16_";
24954 case E_TFmode:
24955 /* __float128 is "g". */
24956 return "g";
24957 case E_XFmode:
24958 /* "long double" or __float80 is "e". */
24959 return "e";
24960 default:
24961 return NULL;
24962 }
24963}
24964
24965/* Create C++ tinfo symbols for only conditionally available fundamental
24966 types. */
24967
24968static void
24969ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
24970{
24971 extern tree ix86_float16_type_node;
24972 extern tree ix86_bf16_type_node;
24973
24974 if (!TARGET_SSE2)
24975 {
24976 if (!float16_type_node)
24977 float16_type_node = ix86_float16_type_node;
24978 if (!bfloat16_type_node)
24979 bfloat16_type_node = ix86_bf16_type_node;
24980 callback (float16_type_node);
24981 callback (bfloat16_type_node);
24982 float16_type_node = NULL_TREE;
24983 bfloat16_type_node = NULL_TREE;
24984 }
24985}
24986
24987static GTY(()) tree ix86_tls_stack_chk_guard_decl;
24988
24989static tree
24990ix86_stack_protect_guard (void)
24991{
24992 if (TARGET_SSP_TLS_GUARD)
24993 {
24994 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
24995 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
24996 tree type = build_qualified_type (type_node, qual);
24997 tree t;
24998
24999 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
25000 {
25001 t = ix86_tls_stack_chk_guard_decl;
25002
25003 if (t == NULL)
25004 {
25005 rtx x;
25006
25007 t = build_decl
25008 (UNKNOWN_LOCATION, VAR_DECL,
25009 get_identifier (ix86_stack_protector_guard_symbol_str),
25010 type);
25011 TREE_STATIC (t) = 1;
25012 TREE_PUBLIC (t) = 1;
25013 DECL_EXTERNAL (t) = 1;
25014 TREE_USED (t) = 1;
25015 TREE_THIS_VOLATILE (t) = 1;
25016 DECL_ARTIFICIAL (t) = 1;
25017 DECL_IGNORED_P (t) = 1;
25018
25019 /* Do not share RTL as the declaration is visible outside of
25020 current function. */
25021 x = DECL_RTL (t);
25022 RTX_FLAG (x, used) = 1;
25023
25024 ix86_tls_stack_chk_guard_decl = t;
25025 }
25026 }
25027 else
25028 {
25029 tree asptrtype = build_pointer_type (type);
25030
25031 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
25032 t = build2 (MEM_REF, asptrtype, t,
25033 build_int_cst (asptrtype, 0));
25034 TREE_THIS_VOLATILE (t) = 1;
25035 }
25036
25037 return t;
25038 }
25039
25040 return default_stack_protect_guard ();
25041}
25042
25043static bool
25044ix86_stack_protect_runtime_enabled_p (void)
25045{
25046 /* Naked functions should not enable stack protector. */
25047 return !ix86_function_naked (fn: current_function_decl);
25048}
25049
25050/* For 32-bit code we can save PIC register setup by using
25051 __stack_chk_fail_local hidden function instead of calling
25052 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25053 register, so it is better to call __stack_chk_fail directly. */
25054
25055static tree ATTRIBUTE_UNUSED
25056ix86_stack_protect_fail (void)
25057{
25058 return TARGET_64BIT
25059 ? default_external_stack_protect_fail ()
25060 : default_hidden_stack_protect_fail ();
25061}
25062
25063/* Select a format to encode pointers in exception handling data. CODE
25064 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25065 true if the symbol may be affected by dynamic relocations.
25066
25067 ??? All x86 object file formats are capable of representing this.
25068 After all, the relocation needed is the same as for the call insn.
25069 Whether or not a particular assembler allows us to enter such, I
25070 guess we'll have to see. */
25071
25072int
25073asm_preferred_eh_data_format (int code, int global)
25074{
25075 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
25076 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
25077 {
25078 int type = DW_EH_PE_sdata8;
25079 if (ptr_mode == SImode
25080 || ix86_cmodel == CM_SMALL_PIC
25081 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25082 type = DW_EH_PE_sdata4;
25083 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25084 }
25085
25086 if (ix86_cmodel == CM_SMALL
25087 || (ix86_cmodel == CM_MEDIUM && code))
25088 return DW_EH_PE_udata4;
25089
25090 return DW_EH_PE_absptr;
25091}
25092
25093/* Implement targetm.vectorize.builtin_vectorization_cost. */
25094static int
25095ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
25096 tree vectype, int)
25097{
25098 bool fp = false;
25099 machine_mode mode = TImode;
25100 int index;
25101 if (vectype != NULL)
25102 {
25103 fp = FLOAT_TYPE_P (vectype);
25104 mode = TYPE_MODE (vectype);
25105 }
25106
25107 switch (type_of_cost)
25108 {
25109 case scalar_stmt:
25110 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
25111
25112 case scalar_load:
25113 /* load/store costs are relative to register move which is 2. Recompute
25114 it to COSTS_N_INSNS so everything have same base. */
25115 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
25116 : ix86_cost->int_load [2]) / 2;
25117
25118 case scalar_store:
25119 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
25120 : ix86_cost->int_store [2]) / 2;
25121
25122 case vector_stmt:
25123 return ix86_vec_cost (mode,
25124 cost: fp ? ix86_cost->addss : ix86_cost->sse_op);
25125
25126 case vector_load:
25127 index = sse_store_index (mode);
25128 /* See PR82713 - we may end up being called on non-vector type. */
25129 if (index < 0)
25130 index = 2;
25131 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
25132
25133 case vector_store:
25134 index = sse_store_index (mode);
25135 /* See PR82713 - we may end up being called on non-vector type. */
25136 if (index < 0)
25137 index = 2;
25138 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
25139
25140 case vec_to_scalar:
25141 case scalar_to_vec:
25142 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25143
25144 /* We should have separate costs for unaligned loads and gather/scatter.
25145 Do that incrementally. */
25146 case unaligned_load:
25147 index = sse_store_index (mode);
25148 /* See PR82713 - we may end up being called on non-vector type. */
25149 if (index < 0)
25150 index = 2;
25151 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
25152
25153 case unaligned_store:
25154 index = sse_store_index (mode);
25155 /* See PR82713 - we may end up being called on non-vector type. */
25156 if (index < 0)
25157 index = 2;
25158 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
25159
25160 case vector_gather_load:
25161 return ix86_vec_cost (mode,
25162 COSTS_N_INSNS
25163 (ix86_cost->gather_static
25164 + ix86_cost->gather_per_elt
25165 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
25166
25167 case vector_scatter_store:
25168 return ix86_vec_cost (mode,
25169 COSTS_N_INSNS
25170 (ix86_cost->scatter_static
25171 + ix86_cost->scatter_per_elt
25172 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
25173
25174 case cond_branch_taken:
25175 return ix86_cost->cond_taken_branch_cost;
25176
25177 case cond_branch_not_taken:
25178 return ix86_cost->cond_not_taken_branch_cost;
25179
25180 case vec_perm:
25181 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25182
25183 case vec_promote_demote:
25184 if (fp)
25185 return vec_fp_conversion_cost (cost: ix86_tune_cost, size: mode);
25186 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25187
25188 case vec_construct:
25189 {
25190 int n = TYPE_VECTOR_SUBPARTS (node: vectype);
25191 /* N - 1 element inserts into an SSE vector, the possible
25192 GPR -> XMM move is accounted for in add_stmt_cost. */
25193 if (GET_MODE_BITSIZE (mode) <= 128)
25194 return (n - 1) * ix86_cost->sse_op;
25195 /* One vinserti128 for combining two SSE vectors for AVX256. */
25196 else if (GET_MODE_BITSIZE (mode) == 256)
25197 return ((n - 2) * ix86_cost->sse_op
25198 + ix86_vec_cost (mode, cost: ix86_cost->sse_op));
25199 /* One vinserti64x4 and two vinserti128 for combining SSE
25200 and AVX256 vectors to AVX512. */
25201 else if (GET_MODE_BITSIZE (mode) == 512)
25202 {
25203 machine_mode half_mode
25204 = mode_for_vector (GET_MODE_INNER (mode),
25205 GET_MODE_NUNITS (mode) / 2).require ();
25206 return ((n - 4) * ix86_cost->sse_op
25207 + 2 * ix86_vec_cost (mode: half_mode, cost: ix86_cost->sse_op)
25208 + ix86_vec_cost (mode, cost: ix86_cost->sse_op));
25209 }
25210 gcc_unreachable ();
25211 }
25212
25213 default:
25214 gcc_unreachable ();
25215 }
25216}
25217
25218
25219/* This function returns the calling abi specific va_list type node.
25220 It returns the FNDECL specific va_list type. */
25221
25222static tree
25223ix86_fn_abi_va_list (tree fndecl)
25224{
25225 if (!TARGET_64BIT)
25226 return va_list_type_node;
25227 gcc_assert (fndecl != NULL_TREE);
25228
25229 if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI)
25230 return ms_va_list_type_node;
25231 else
25232 return sysv_va_list_type_node;
25233}
25234
25235/* Returns the canonical va_list type specified by TYPE. If there
25236 is no valid TYPE provided, it return NULL_TREE. */
25237
25238static tree
25239ix86_canonical_va_list_type (tree type)
25240{
25241 if (TARGET_64BIT)
25242 {
25243 if (lookup_attribute (attr_name: "ms_abi va_list", TYPE_ATTRIBUTES (type)))
25244 return ms_va_list_type_node;
25245
25246 if ((TREE_CODE (type) == ARRAY_TYPE
25247 && integer_zerop (array_type_nelts_minus_one (type)))
25248 || POINTER_TYPE_P (type))
25249 {
25250 tree elem_type = TREE_TYPE (type);
25251 if (TREE_CODE (elem_type) == RECORD_TYPE
25252 && lookup_attribute (attr_name: "sysv_abi va_list",
25253 TYPE_ATTRIBUTES (elem_type)))
25254 return sysv_va_list_type_node;
25255 }
25256
25257 return NULL_TREE;
25258 }
25259
25260 return std_canonical_va_list_type (type);
25261}
25262
25263/* Iterate through the target-specific builtin types for va_list.
25264 IDX denotes the iterator, *PTREE is set to the result type of
25265 the va_list builtin, and *PNAME to its internal type.
25266 Returns zero if there is no element for this index, otherwise
25267 IDX should be increased upon the next call.
25268 Note, do not iterate a base builtin's name like __builtin_va_list.
25269 Used from c_common_nodes_and_builtins. */
25270
25271static int
25272ix86_enum_va_list (int idx, const char **pname, tree *ptree)
25273{
25274 if (TARGET_64BIT)
25275 {
25276 switch (idx)
25277 {
25278 default:
25279 break;
25280
25281 case 0:
25282 *ptree = ms_va_list_type_node;
25283 *pname = "__builtin_ms_va_list";
25284 return 1;
25285
25286 case 1:
25287 *ptree = sysv_va_list_type_node;
25288 *pname = "__builtin_sysv_va_list";
25289 return 1;
25290 }
25291 }
25292
25293 return 0;
25294}
25295
25296#undef TARGET_SCHED_DISPATCH
25297#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
25298#undef TARGET_SCHED_DISPATCH_DO
25299#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
25300#undef TARGET_SCHED_REASSOCIATION_WIDTH
25301#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
25302#undef TARGET_SCHED_REORDER
25303#define TARGET_SCHED_REORDER ix86_atom_sched_reorder
25304#undef TARGET_SCHED_ADJUST_PRIORITY
25305#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
25306#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
25307#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
25308 ix86_dependencies_evaluation_hook
25309
25310
25311/* Implementation of reassociation_width target hook used by
25312 reassoc phase to identify parallelism level in reassociated
25313 tree. Statements tree_code is passed in OPC. Arguments type
25314 is passed in MODE. */
25315
25316static int
25317ix86_reassociation_width (unsigned int op, machine_mode mode)
25318{
25319 int width = 1;
25320 /* Vector part. */
25321 if (VECTOR_MODE_P (mode))
25322 {
25323 int div = 1;
25324 if (INTEGRAL_MODE_P (mode))
25325 width = ix86_cost->reassoc_vec_int;
25326 else if (FLOAT_MODE_P (mode))
25327 width = ix86_cost->reassoc_vec_fp;
25328
25329 if (width == 1)
25330 return 1;
25331
25332 /* Znver1-4 Integer vector instructions execute in FP unit
25333 and can execute 3 additions and one multiplication per cycle. */
25334 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
25335 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
25336 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25337 return 1;
25338 /* Znver5 can do 2 integer multiplications per cycle with latency
25339 of 3. */
25340 if (ix86_tune == PROCESSOR_ZNVER5
25341 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
25342 width = 6;
25343
25344 /* Account for targets that splits wide vectors into multiple parts. */
25345 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
25346 div = GET_MODE_BITSIZE (mode) / 256;
25347 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
25348 div = GET_MODE_BITSIZE (mode) / 128;
25349 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
25350 div = GET_MODE_BITSIZE (mode) / 64;
25351 width = (width + div - 1) / div;
25352 }
25353 /* Scalar part. */
25354 else if (INTEGRAL_MODE_P (mode))
25355 width = ix86_cost->reassoc_int;
25356 else if (FLOAT_MODE_P (mode))
25357 width = ix86_cost->reassoc_fp;
25358
25359 /* Avoid using too many registers in 32bit mode. */
25360 if (!TARGET_64BIT && width > 2)
25361 width = 2;
25362 return width;
25363}
25364
25365/* ??? No autovectorization into MMX or 3DNOW until we can reliably
25366 place emms and femms instructions. */
25367
25368static machine_mode
25369ix86_preferred_simd_mode (scalar_mode mode)
25370{
25371 if (!TARGET_SSE)
25372 return word_mode;
25373
25374 switch (mode)
25375 {
25376 case E_QImode:
25377 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25378 return V64QImode;
25379 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25380 return V32QImode;
25381 else
25382 return V16QImode;
25383
25384 case E_HImode:
25385 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
25386 return V32HImode;
25387 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25388 return V16HImode;
25389 else
25390 return V8HImode;
25391
25392 case E_SImode:
25393 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25394 return V16SImode;
25395 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25396 return V8SImode;
25397 else
25398 return V4SImode;
25399
25400 case E_DImode:
25401 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25402 return V8DImode;
25403 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25404 return V4DImode;
25405 else
25406 return V2DImode;
25407
25408 case E_HFmode:
25409 if (TARGET_AVX512FP16)
25410 {
25411 if (TARGET_AVX512VL)
25412 {
25413 if (TARGET_PREFER_AVX128)
25414 return V8HFmode;
25415 else if (TARGET_PREFER_AVX256)
25416 return V16HFmode;
25417 }
25418 return V32HFmode;
25419 }
25420 return word_mode;
25421
25422 case E_BFmode:
25423 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25424 return V32BFmode;
25425 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25426 return V16BFmode;
25427 else
25428 return V8BFmode;
25429
25430 case E_SFmode:
25431 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25432 return V16SFmode;
25433 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25434 return V8SFmode;
25435 else
25436 return V4SFmode;
25437
25438 case E_DFmode:
25439 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25440 return V8DFmode;
25441 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25442 return V4DFmode;
25443 else if (TARGET_SSE2)
25444 return V2DFmode;
25445 /* FALLTHRU */
25446
25447 default:
25448 return word_mode;
25449 }
25450}
25451
25452/* If AVX is enabled then try vectorizing with both 256bit and 128bit
25453 vectors. If AVX512F is enabled then try vectorizing with 512bit,
25454 256bit and 128bit vectors. */
25455
25456static unsigned int
25457ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
25458{
25459 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
25460 {
25461 modes->safe_push (V64QImode);
25462 modes->safe_push (V32QImode);
25463 modes->safe_push (V16QImode);
25464 }
25465 else if (TARGET_AVX512F && all)
25466 {
25467 modes->safe_push (V32QImode);
25468 modes->safe_push (V16QImode);
25469 modes->safe_push (V64QImode);
25470 }
25471 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
25472 {
25473 modes->safe_push (V32QImode);
25474 modes->safe_push (V16QImode);
25475 }
25476 else if (TARGET_AVX && all)
25477 {
25478 modes->safe_push (V16QImode);
25479 modes->safe_push (V32QImode);
25480 }
25481 else if (TARGET_SSE2)
25482 modes->safe_push (V16QImode);
25483
25484 if (TARGET_MMX_WITH_SSE)
25485 modes->safe_push (V8QImode);
25486
25487 if (TARGET_SSE2)
25488 modes->safe_push (V4QImode);
25489
25490 return 0;
25491}
25492
25493/* Implemenation of targetm.vectorize.get_mask_mode. */
25494
25495static opt_machine_mode
25496ix86_get_mask_mode (machine_mode data_mode)
25497{
25498 unsigned vector_size = GET_MODE_SIZE (data_mode);
25499 unsigned nunits = GET_MODE_NUNITS (data_mode);
25500 unsigned elem_size = vector_size / nunits;
25501
25502 /* Scalar mask case. */
25503 if ((TARGET_AVX512F && vector_size == 64)
25504 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
25505 /* AVX512FP16 only supports vector comparison
25506 to kmask for _Float16. */
25507 || (TARGET_AVX512VL && TARGET_AVX512FP16
25508 && GET_MODE_INNER (data_mode) == E_HFmode)
25509 || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
25510 {
25511 if (elem_size == 4
25512 || elem_size == 8
25513 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
25514 return smallest_int_mode_for_size (size: nunits).require ();
25515 }
25516
25517 scalar_int_mode elem_mode
25518 = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT).require ();
25519
25520 gcc_assert (elem_size * nunits == vector_size);
25521
25522 return mode_for_vector (elem_mode, nunits);
25523}
25524
25525
25526
25527/* Return class of registers which could be used for pseudo of MODE
25528 and of class RCLASS for spilling instead of memory. Return NO_REGS
25529 if it is not possible or non-profitable. */
25530
25531/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
25532
25533static reg_class_t
25534ix86_spill_class (reg_class_t rclass, machine_mode mode)
25535{
25536 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
25537 && TARGET_SSE2
25538 && TARGET_INTER_UNIT_MOVES_TO_VEC
25539 && TARGET_INTER_UNIT_MOVES_FROM_VEC
25540 && (mode == SImode || (TARGET_64BIT && mode == DImode))
25541 && INTEGER_CLASS_P (rclass))
25542 return ALL_SSE_REGS;
25543 return NO_REGS;
25544}
25545
25546/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
25547 but returns a lower bound. */
25548
25549static unsigned int
25550ix86_max_noce_ifcvt_seq_cost (edge e)
25551{
25552 bool predictable_p = predictable_edge_p (e);
25553 if (predictable_p)
25554 {
25555 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
25556 return param_max_rtl_if_conversion_predictable_cost;
25557 }
25558 else
25559 {
25560 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
25561 return param_max_rtl_if_conversion_unpredictable_cost;
25562 }
25563
25564 /* For modern machines with deeper pipeline, the penalty for branch
25565 misprediction could be higher than before to reset the pipeline
25566 slots. Add parameter br_mispredict_scale as a factor to describe
25567 the impact of reseting the pipeline. */
25568
25569 return BRANCH_COST (true, predictable_p)
25570 * ix86_tune_cost->br_mispredict_scale;
25571}
25572
25573/* Return true if SEQ is a good candidate as a replacement for the
25574 if-convertible sequence described in IF_INFO. */
25575
25576static bool
25577ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
25578{
25579 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
25580 {
25581 int cmov_cnt = 0;
25582 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
25583 Maybe we should allow even more conditional moves as long as they
25584 are used far enough not to stall the CPU, or also consider
25585 IF_INFO->TEST_BB succ edge probabilities. */
25586 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
25587 {
25588 rtx set = single_set (insn);
25589 if (!set)
25590 continue;
25591 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
25592 continue;
25593 rtx src = SET_SRC (set);
25594 machine_mode mode = GET_MODE (src);
25595 if (GET_MODE_CLASS (mode) != MODE_INT
25596 && GET_MODE_CLASS (mode) != MODE_FLOAT)
25597 continue;
25598 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
25599 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
25600 continue;
25601 /* insn is CMOV or FCMOV. */
25602 if (++cmov_cnt > 1)
25603 return false;
25604 }
25605 }
25606
25607 /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
25608 for movdfcc/movsfcc, and could possibly fail cost comparison.
25609 Increase branch cost will hurt performance for other modes, so
25610 specially add some preference for floating point ifcvt. */
25611 if (!TARGET_SSE4_1 && if_info->x
25612 && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
25613 && if_info->speed_p)
25614 {
25615 unsigned cost = seq_cost (seq, true);
25616
25617 if (cost <= if_info->original_cost)
25618 return true;
25619
25620 return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
25621 }
25622
25623 return default_noce_conversion_profitable_p (seq, if_info);
25624}
25625
25626/* x86-specific vector costs. */
25627class ix86_vector_costs : public vector_costs
25628{
25629public:
25630 ix86_vector_costs (vec_info *, bool);
25631
25632 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
25633 stmt_vec_info stmt_info, slp_tree node,
25634 tree vectype, int misalign,
25635 vect_cost_model_location where) override;
25636 void finish_cost (const vector_costs *) override;
25637
25638private:
25639
25640 /* Estimate register pressure of the vectorized code. */
25641 void ix86_vect_estimate_reg_pressure ();
25642 /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
25643 estimation of register pressure.
25644 ??? Currently it's only used by vec_construct/scalar_to_vec
25645 where we know it's not loaded from memory. */
25646 unsigned m_num_gpr_needed[3];
25647 unsigned m_num_sse_needed[3];
25648 /* Number of 256-bit vector permutation. */
25649 unsigned m_num_avx256_vec_perm[3];
25650};
25651
25652ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
25653 : vector_costs (vinfo, costing_for_scalar),
25654 m_num_gpr_needed (),
25655 m_num_sse_needed (),
25656 m_num_avx256_vec_perm ()
25657{
25658}
25659
25660/* Implement targetm.vectorize.create_costs. */
25661
25662static vector_costs *
25663ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
25664{
25665 return new ix86_vector_costs (vinfo, costing_for_scalar);
25666}
25667
25668unsigned
25669ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
25670 stmt_vec_info stmt_info, slp_tree node,
25671 tree vectype, int misalign,
25672 vect_cost_model_location where)
25673{
25674 unsigned retval = 0;
25675 bool scalar_p
25676 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
25677 int stmt_cost = - 1;
25678
25679 bool fp = false;
25680 machine_mode mode = scalar_p ? SImode : TImode;
25681
25682 if (vectype != NULL)
25683 {
25684 fp = FLOAT_TYPE_P (vectype);
25685 mode = TYPE_MODE (vectype);
25686 if (scalar_p)
25687 mode = TYPE_MODE (TREE_TYPE (vectype));
25688 }
25689
25690 if ((kind == vector_stmt || kind == scalar_stmt)
25691 && stmt_info
25692 && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN)
25693 {
25694 tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt);
25695 /*machine_mode inner_mode = mode;
25696 if (VECTOR_MODE_P (mode))
25697 inner_mode = GET_MODE_INNER (mode);*/
25698
25699 switch (subcode)
25700 {
25701 case PLUS_EXPR:
25702 case POINTER_PLUS_EXPR:
25703 case MINUS_EXPR:
25704 if (kind == scalar_stmt)
25705 {
25706 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25707 stmt_cost = ix86_cost->addss;
25708 else if (X87_FLOAT_MODE_P (mode))
25709 stmt_cost = ix86_cost->fadd;
25710 else
25711 stmt_cost = ix86_cost->add;
25712 }
25713 else
25714 stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss
25715 : ix86_cost->sse_op);
25716 break;
25717
25718 case MULT_EXPR:
25719 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
25720 take it as MULT_EXPR. */
25721 case MULT_HIGHPART_EXPR:
25722 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
25723 break;
25724 /* There's no direct instruction for WIDEN_MULT_EXPR,
25725 take emulation into account. */
25726 case WIDEN_MULT_EXPR:
25727 stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode,
25728 TYPE_UNSIGNED (vectype));
25729 break;
25730
25731 case NEGATE_EXPR:
25732 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25733 stmt_cost = ix86_cost->sse_op;
25734 else if (X87_FLOAT_MODE_P (mode))
25735 stmt_cost = ix86_cost->fchs;
25736 else if (VECTOR_MODE_P (mode))
25737 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25738 else
25739 stmt_cost = ix86_cost->add;
25740 break;
25741 case TRUNC_DIV_EXPR:
25742 case CEIL_DIV_EXPR:
25743 case FLOOR_DIV_EXPR:
25744 case ROUND_DIV_EXPR:
25745 case TRUNC_MOD_EXPR:
25746 case CEIL_MOD_EXPR:
25747 case FLOOR_MOD_EXPR:
25748 case RDIV_EXPR:
25749 case ROUND_MOD_EXPR:
25750 case EXACT_DIV_EXPR:
25751 stmt_cost = ix86_division_cost (cost: ix86_cost, mode);
25752 break;
25753
25754 case RSHIFT_EXPR:
25755 case LSHIFT_EXPR:
25756 case LROTATE_EXPR:
25757 case RROTATE_EXPR:
25758 {
25759 tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt);
25760 tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt);
25761 stmt_cost = ix86_shift_rotate_cost
25762 (cost: ix86_cost,
25763 code: (subcode == RSHIFT_EXPR
25764 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
25765 ? ASHIFTRT : LSHIFTRT, mode,
25766 TREE_CODE (op2) == INTEGER_CST,
25767 op1_val: cst_and_fits_in_hwi (op2)
25768 ? int_cst_value (op2) : -1,
25769 and_in_op1: false, shift_and_truncate: false, NULL, NULL);
25770 }
25771 break;
25772 case NOP_EXPR:
25773 /* Only sign-conversions are free. */
25774 if (tree_nop_conversion_p
25775 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
25776 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
25777 stmt_cost = 0;
25778 else if (fp)
25779 stmt_cost = vec_fp_conversion_cost
25780 (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode));
25781 break;
25782
25783 case FLOAT_EXPR:
25784 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25785 stmt_cost = ix86_cost->cvtsi2ss;
25786 else if (X87_FLOAT_MODE_P (mode))
25787 /* TODO: We do not have cost tables for x87. */
25788 stmt_cost = ix86_cost->fadd;
25789 else
25790 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps);
25791 break;
25792
25793 case FIX_TRUNC_EXPR:
25794 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25795 stmt_cost = ix86_cost->cvtss2si;
25796 else if (X87_FLOAT_MODE_P (mode))
25797 /* TODO: We do not have cost tables for x87. */
25798 stmt_cost = ix86_cost->fadd;
25799 else
25800 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi);
25801 break;
25802
25803 case COND_EXPR:
25804 {
25805 /* SSE2 conditinal move sequence is:
25806 pcmpgtd %xmm5, %xmm0 (accounted separately)
25807 pand %xmm0, %xmm2
25808 pandn %xmm1, %xmm0
25809 por %xmm2, %xmm0
25810 while SSE4 uses cmp + blend
25811 and AVX512 masked moves.
25812
25813 The condition is accounted separately since we usually have
25814 p = a < b
25815 c = p ? x : y
25816 and we will account first statement as setcc. Exception is when
25817 p is loaded from memory as bool and then we will not acocunt
25818 the compare, but there is no way to check for this. */
25819
25820 int ninsns = TARGET_SSE4_1 ? 1 : 3;
25821
25822 /* If one of parameters is 0 or -1 the sequence will be simplified:
25823 (if_true & mask) | (if_false & ~mask) -> if_true & mask */
25824 if (ninsns > 1
25825 && (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt))
25826 || zerop (gimple_assign_rhs3 (gs: stmt_info->stmt))
25827 || integer_minus_onep
25828 (gimple_assign_rhs2 (gs: stmt_info->stmt))
25829 || integer_minus_onep
25830 (gimple_assign_rhs3 (gs: stmt_info->stmt))))
25831 ninsns = 1;
25832
25833 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25834 stmt_cost = ninsns * ix86_cost->sse_op;
25835 else if (X87_FLOAT_MODE_P (mode))
25836 /* x87 requires conditional branch. We don't have cost for
25837 that. */
25838 ;
25839 else if (VECTOR_MODE_P (mode))
25840 stmt_cost = ix86_vec_cost (mode, cost: ninsns * ix86_cost->sse_op);
25841 else
25842 /* compare (accounted separately) + cmov. */
25843 stmt_cost = ix86_cost->add;
25844 }
25845 break;
25846
25847 case MIN_EXPR:
25848 case MAX_EXPR:
25849 if (fp)
25850 {
25851 if (X87_FLOAT_MODE_P (mode)
25852 && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25853 /* x87 requires conditional branch. We don't have cost for
25854 that. */
25855 ;
25856 else
25857 /* minss */
25858 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25859 }
25860 else
25861 {
25862 if (VECTOR_MODE_P (mode))
25863 {
25864 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25865 /* vpmin was introduced in SSE3.
25866 SSE2 needs pcmpgtd + pand + pandn + pxor.
25867 If one of parameters is 0 or -1 the sequence is simplified
25868 to pcmpgtd + pand. */
25869 if (!TARGET_SSSE3)
25870 {
25871 if (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt))
25872 || integer_minus_onep
25873 (gimple_assign_rhs2 (gs: stmt_info->stmt)))
25874 stmt_cost *= 2;
25875 else
25876 stmt_cost *= 4;
25877 }
25878 }
25879 else
25880 /* cmp + cmov. */
25881 stmt_cost = ix86_cost->add * 2;
25882 }
25883 break;
25884
25885 case ABS_EXPR:
25886 case ABSU_EXPR:
25887 if (fp)
25888 {
25889 if (X87_FLOAT_MODE_P (mode)
25890 && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25891 /* fabs. */
25892 stmt_cost = ix86_cost->fabs;
25893 else
25894 /* andss of sign bit. */
25895 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25896 }
25897 else
25898 {
25899 if (VECTOR_MODE_P (mode))
25900 {
25901 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25902 /* vabs was introduced in SSE3.
25903 SSE3 uses psrat + pxor + psub. */
25904 if (!TARGET_SSSE3)
25905 stmt_cost *= 3;
25906 }
25907 else
25908 /* neg + cmov. */
25909 stmt_cost = ix86_cost->add * 2;
25910 }
25911 break;
25912
25913 case BIT_IOR_EXPR:
25914 case BIT_XOR_EXPR:
25915 case BIT_AND_EXPR:
25916 case BIT_NOT_EXPR:
25917 gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
25918 && !X87_FLOAT_MODE_P (mode));
25919 if (VECTOR_MODE_P (mode))
25920 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25921 else
25922 stmt_cost = ix86_cost->add;
25923 break;
25924
25925 default:
25926 if (truth_value_p (code: subcode))
25927 {
25928 if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
25929 /* CMPccS? insructions are cheap, so use sse_op. While they
25930 produce a mask which may need to be turned to 0/1 by and,
25931 expect that this will be optimized away in a common case. */
25932 stmt_cost = ix86_cost->sse_op;
25933 else if (X87_FLOAT_MODE_P (mode))
25934 /* fcmp + setcc. */
25935 stmt_cost = ix86_cost->fadd + ix86_cost->add;
25936 else if (VECTOR_MODE_P (mode))
25937 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25938 else
25939 /* setcc. */
25940 stmt_cost = ix86_cost->add;
25941 break;
25942 }
25943 break;
25944 }
25945 }
25946
25947 combined_fn cfn;
25948 if ((kind == vector_stmt || kind == scalar_stmt)
25949 && stmt_info
25950 && stmt_info->stmt
25951 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
25952 switch (cfn)
25953 {
25954 case CFN_FMA:
25955 stmt_cost = ix86_vec_cost (mode,
25956 cost: mode == SFmode ? ix86_cost->fmass
25957 : ix86_cost->fmasd);
25958 break;
25959 case CFN_MULH:
25960 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
25961 break;
25962 default:
25963 break;
25964 }
25965
25966 if (kind == vec_promote_demote)
25967 {
25968 int outer_size
25969 = tree_to_uhwi
25970 (TYPE_SIZE
25971 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
25972 int inner_size
25973 = tree_to_uhwi
25974 (TYPE_SIZE
25975 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
25976 bool inner_fp = FLOAT_TYPE_P
25977 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
25978
25979 if (fp && inner_fp)
25980 stmt_cost = vec_fp_conversion_cost
25981 (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode));
25982 else if (fp && !inner_fp)
25983 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps);
25984 else if (!fp && inner_fp)
25985 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi);
25986 else
25987 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25988 /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
25989 greater than inner size we will end up doing two conversions and
25990 packing them. We always pack pairs; if the size difference is greater
25991 it is split into multiple demote operations. */
25992 if (inner_size > outer_size)
25993 stmt_cost = stmt_cost * 2
25994 + ix86_vec_cost (mode, cost: ix86_cost->sse_op);
25995 }
25996
25997 /* If we do elementwise loads into a vector then we are bound by
25998 latency and execution resources for the many scalar loads
25999 (AGU and load ports). Try to account for this by scaling the
26000 construction cost by the number of elements involved. */
26001 if ((kind == vec_construct || kind == vec_to_scalar)
26002 && ((stmt_info
26003 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
26004 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
26005 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
26006 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
26007 != INTEGER_CST))
26008 || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
26009 == VMAT_GATHER_SCATTER)))
26010 || (node
26011 && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
26012 || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
26013 && SLP_TREE_LANES (node) == 1))
26014 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
26015 (SLP_TREE_REPRESENTATIVE (node))))
26016 != INTEGER_CST))
26017 || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
26018 == VMAT_GATHER_SCATTER)))))
26019 {
26020 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
26021 stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1);
26022 }
26023 else if ((kind == vec_construct || kind == scalar_to_vec)
26024 && node
26025 && SLP_TREE_DEF_TYPE (node) == vect_external_def)
26026 {
26027 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
26028 unsigned i;
26029 tree op;
26030 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26031 if (TREE_CODE (op) == SSA_NAME)
26032 TREE_VISITED (op) = 0;
26033 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26034 {
26035 if (TREE_CODE (op) != SSA_NAME
26036 || TREE_VISITED (op))
26037 continue;
26038 TREE_VISITED (op) = 1;
26039 gimple *def = SSA_NAME_DEF_STMT (op);
26040 tree tem;
26041 if (is_gimple_assign (gs: def)
26042 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
26043 && ((tem = gimple_assign_rhs1 (gs: def)), true)
26044 && TREE_CODE (tem) == SSA_NAME
26045 /* A sign-change expands to nothing. */
26046 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
26047 TREE_TYPE (tem)))
26048 def = SSA_NAME_DEF_STMT (tem);
26049 /* When the component is loaded from memory we can directly
26050 move it to a vector register, otherwise we have to go
26051 via a GPR or via vpinsr which involves similar cost.
26052 Likewise with a BIT_FIELD_REF extracting from a vector
26053 register we can hope to avoid using a GPR. */
26054 if (!is_gimple_assign (gs: def)
26055 || ((!gimple_assign_load_p (def)
26056 || (!TARGET_SSE4_1
26057 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
26058 && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF
26059 || !VECTOR_TYPE_P (TREE_TYPE
26060 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
26061 {
26062 if (fp)
26063 m_num_sse_needed[where]++;
26064 else
26065 {
26066 m_num_gpr_needed[where]++;
26067
26068 int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
26069
26070 /* For integer construction, the number of actual GPR -> XMM
26071 moves will be somewhere between 0 and n.
26072 We do not have very good idea about actual number, since
26073 the source may be a constant, memory or a chain of
26074 instructions that will be later converted by
26075 scalar-to-vector pass. */
26076 if (kind == vec_construct
26077 && GET_MODE_BITSIZE (mode) == 256)
26078 cost *= 2;
26079 else if (kind == vec_construct
26080 && GET_MODE_BITSIZE (mode) == 512)
26081 cost *= 3;
26082 stmt_cost += cost;
26083 }
26084 }
26085 }
26086 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
26087 if (TREE_CODE (op) == SSA_NAME)
26088 TREE_VISITED (op) = 0;
26089 }
26090 if (stmt_cost == -1)
26091 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
26092
26093 if (kind == vec_perm && vectype
26094 && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
26095 m_num_avx256_vec_perm[where]++;
26096
26097 /* Penalize DFmode vector operations for Bonnell. */
26098 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
26099 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
26100 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
26101
26102 /* Statements in an inner loop relative to the loop being
26103 vectorized are weighted more heavily. The value here is
26104 arbitrary and could potentially be improved with analysis. */
26105 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
26106
26107 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
26108 for Silvermont as it has out of order integer pipeline and can execute
26109 2 scalar instruction per tick, but has in order SIMD pipeline. */
26110 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
26111 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
26112 && stmt_info && stmt_info->stmt)
26113 {
26114 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
26115 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
26116 retval = (retval * 17) / 10;
26117 }
26118
26119 m_costs[where] += retval;
26120
26121 return retval;
26122}
26123
26124void
26125ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
26126{
26127 unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
26128 unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
26129
26130 /* Any better way to have target available fp registers, currently use SSE_REGS. */
26131 unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
26132 for (unsigned i = 0; i != 3; i++)
26133 {
26134 if (m_num_gpr_needed[i] > target_avail_regs)
26135 m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
26136 /* Only measure sse registers pressure. */
26137 if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
26138 m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
26139 }
26140}
26141
26142void
26143ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
26144{
26145 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo);
26146 if (loop_vinfo && !m_costing_for_scalar)
26147 {
26148 /* We are currently not asking the vectorizer to compare costs
26149 between different vector mode sizes. When using predication
26150 that will end up always choosing the prefered mode size even
26151 if there's a smaller mode covering all lanes. Test for this
26152 situation and artificially reject the larger mode attempt.
26153 ??? We currently lack masked ops for sub-SSE sized modes,
26154 so we could restrict this rejection to AVX and AVX512 modes
26155 but error on the safe side for now. */
26156 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
26157 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26158 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
26159 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
26160 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
26161 m_costs[vect_body] = INT_MAX;
26162 }
26163
26164 ix86_vect_estimate_reg_pressure ();
26165
26166 for (int i = 0; i != 3; i++)
26167 if (m_num_avx256_vec_perm[i]
26168 && TARGET_AVX256_AVOID_VEC_PERM)
26169 m_costs[i] = INT_MAX;
26170
26171 /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
26172 a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
26173 if (loop_vinfo
26174 && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26175 && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
26176 && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
26177 m_suggested_epilogue_mode = V16QImode;
26178 /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
26179 enable a 64bit SSE epilogue. */
26180 if (loop_vinfo
26181 && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
26182 && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
26183 && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
26184 m_suggested_epilogue_mode = V8QImode;
26185
26186 vector_costs::finish_cost (scalar_costs);
26187}
26188
26189/* Validate target specific memory model bits in VAL. */
26190
26191static unsigned HOST_WIDE_INT
26192ix86_memmodel_check (unsigned HOST_WIDE_INT val)
26193{
26194 enum memmodel model = memmodel_from_int (val);
26195 bool strong;
26196
26197 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
26198 |MEMMODEL_MASK)
26199 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
26200 {
26201 warning (OPT_Winvalid_memory_model,
26202 "unknown architecture specific memory model");
26203 return MEMMODEL_SEQ_CST;
26204 }
26205 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
26206 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
26207 {
26208 warning (OPT_Winvalid_memory_model,
26209 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
26210 "memory model");
26211 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
26212 }
26213 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
26214 {
26215 warning (OPT_Winvalid_memory_model,
26216 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
26217 "memory model");
26218 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
26219 }
26220 return val;
26221}
26222
26223/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
26224 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
26225 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
26226 or number of vecsize_mangle variants that should be emitted. */
26227
26228static int
26229ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
26230 struct cgraph_simd_clone *clonei,
26231 tree base_type, int num,
26232 bool explicit_p)
26233{
26234 int ret = 1;
26235
26236 if (clonei->simdlen
26237 && (clonei->simdlen < 2
26238 || clonei->simdlen > 1024
26239 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
26240 {
26241 if (explicit_p)
26242 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26243 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
26244 return 0;
26245 }
26246
26247 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
26248 if (TREE_CODE (ret_type) != VOID_TYPE)
26249 switch (TYPE_MODE (ret_type))
26250 {
26251 case E_QImode:
26252 case E_HImode:
26253 case E_SImode:
26254 case E_DImode:
26255 case E_SFmode:
26256 case E_DFmode:
26257 /* case E_SCmode: */
26258 /* case E_DCmode: */
26259 if (!AGGREGATE_TYPE_P (ret_type))
26260 break;
26261 /* FALLTHRU */
26262 default:
26263 if (explicit_p)
26264 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26265 "unsupported return type %qT for simd", ret_type);
26266 return 0;
26267 }
26268
26269 tree t;
26270 int i;
26271 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
26272 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
26273
26274 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
26275 t && t != void_list_node; t = TREE_CHAIN (t), i++)
26276 {
26277 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
26278 switch (TYPE_MODE (arg_type))
26279 {
26280 case E_QImode:
26281 case E_HImode:
26282 case E_SImode:
26283 case E_DImode:
26284 case E_SFmode:
26285 case E_DFmode:
26286 /* case E_SCmode: */
26287 /* case E_DCmode: */
26288 if (!AGGREGATE_TYPE_P (arg_type))
26289 break;
26290 /* FALLTHRU */
26291 default:
26292 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
26293 break;
26294 if (explicit_p)
26295 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26296 "unsupported argument type %qT for simd", arg_type);
26297 return 0;
26298 }
26299 }
26300
26301 if (!TREE_PUBLIC (node->decl) || !explicit_p)
26302 {
26303 /* If the function isn't exported, we can pick up just one ISA
26304 for the clones. */
26305 if (TARGET_AVX512F)
26306 clonei->vecsize_mangle = 'e';
26307 else if (TARGET_AVX2)
26308 clonei->vecsize_mangle = 'd';
26309 else if (TARGET_AVX)
26310 clonei->vecsize_mangle = 'c';
26311 else
26312 clonei->vecsize_mangle = 'b';
26313 ret = 1;
26314 }
26315 else
26316 {
26317 clonei->vecsize_mangle = "bcde"[num];
26318 ret = 4;
26319 }
26320 clonei->mask_mode = VOIDmode;
26321 switch (clonei->vecsize_mangle)
26322 {
26323 case 'b':
26324 clonei->vecsize_int = 128;
26325 clonei->vecsize_float = 128;
26326 break;
26327 case 'c':
26328 clonei->vecsize_int = 128;
26329 clonei->vecsize_float = 256;
26330 break;
26331 case 'd':
26332 clonei->vecsize_int = 256;
26333 clonei->vecsize_float = 256;
26334 break;
26335 case 'e':
26336 clonei->vecsize_int = 512;
26337 clonei->vecsize_float = 512;
26338 if (TYPE_MODE (base_type) == QImode)
26339 clonei->mask_mode = DImode;
26340 else
26341 clonei->mask_mode = SImode;
26342 break;
26343 }
26344 if (clonei->simdlen == 0)
26345 {
26346 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
26347 clonei->simdlen = clonei->vecsize_int;
26348 else
26349 clonei->simdlen = clonei->vecsize_float;
26350 clonei->simdlen = clonei->simdlen
26351 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
26352 }
26353 else if (clonei->simdlen > 16)
26354 {
26355 /* For compatibility with ICC, use the same upper bounds
26356 for simdlen. In particular, for CTYPE below, use the return type,
26357 unless the function returns void, in that case use the characteristic
26358 type. If it is possible for given SIMDLEN to pass CTYPE value
26359 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
26360 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
26361 emit corresponding clone. */
26362 tree ctype = ret_type;
26363 if (VOID_TYPE_P (ret_type))
26364 ctype = base_type;
26365 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
26366 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
26367 cnt /= clonei->vecsize_int;
26368 else
26369 cnt /= clonei->vecsize_float;
26370 if (cnt > (TARGET_64BIT ? 16 : 8))
26371 {
26372 if (explicit_p)
26373 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
26374 "unsupported simdlen %wd",
26375 clonei->simdlen.to_constant ());
26376 return 0;
26377 }
26378 }
26379 return ret;
26380}
26381
26382/* If SIMD clone NODE can't be used in a vectorized loop
26383 in current function, return -1, otherwise return a badness of using it
26384 (0 if it is most desirable from vecsize_mangle point of view, 1
26385 slightly less desirable, etc.). */
26386
26387static int
26388ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
26389{
26390 switch (node->simdclone->vecsize_mangle)
26391 {
26392 case 'b':
26393 if (!TARGET_SSE2)
26394 return -1;
26395 if (!TARGET_AVX)
26396 return 0;
26397 return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
26398 case 'c':
26399 if (!TARGET_AVX)
26400 return -1;
26401 return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
26402 case 'd':
26403 if (!TARGET_AVX2)
26404 return -1;
26405 return TARGET_AVX512F ? 1 : 0;
26406 case 'e':
26407 if (!TARGET_AVX512F)
26408 return -1;
26409 return 0;
26410 default:
26411 gcc_unreachable ();
26412 }
26413}
26414
26415/* This function adjusts the unroll factor based on
26416 the hardware capabilities. For ex, bdver3 has
26417 a loop buffer which makes unrolling of smaller
26418 loops less important. This function decides the
26419 unroll factor using number of memory references
26420 (value 32 is used) as a heuristic. */
26421
26422static unsigned
26423ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
26424{
26425 basic_block *bbs;
26426 rtx_insn *insn;
26427 unsigned i;
26428 unsigned mem_count = 0;
26429
26430 /* Unroll small size loop when unroll factor is not explicitly
26431 specified. */
26432 if (ix86_unroll_only_small_loops && !loop->unroll)
26433 {
26434 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
26435 return MIN (nunroll, ix86_cost->small_unroll_factor);
26436 else
26437 return 1;
26438 }
26439
26440 if (!TARGET_ADJUST_UNROLL)
26441 return nunroll;
26442
26443 /* Count the number of memory references within the loop body.
26444 This value determines the unrolling factor for bdver3 and bdver4
26445 architectures. */
26446 subrtx_iterator::array_type array;
26447 bbs = get_loop_body (loop);
26448 for (i = 0; i < loop->num_nodes; i++)
26449 FOR_BB_INSNS (bbs[i], insn)
26450 if (NONDEBUG_INSN_P (insn))
26451 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
26452 if (const_rtx x = *iter)
26453 if (MEM_P (x))
26454 {
26455 machine_mode mode = GET_MODE (x);
26456 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
26457 if (n_words > 4)
26458 mem_count += 2;
26459 else
26460 mem_count += 1;
26461 }
26462 free (ptr: bbs);
26463
26464 if (mem_count && mem_count <=32)
26465 return MIN (nunroll, 32 / mem_count);
26466
26467 return nunroll;
26468}
26469
26470
26471/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
26472
26473static bool
26474ix86_float_exceptions_rounding_supported_p (void)
26475{
26476 /* For x87 floating point with standard excess precision handling,
26477 there is no adddf3 pattern (since x87 floating point only has
26478 XFmode operations) so the default hook implementation gets this
26479 wrong. */
26480 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
26481}
26482
26483/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
26484
26485static void
26486ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
26487{
26488 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
26489 return;
26490 tree exceptions_var = create_tmp_var_raw (integer_type_node);
26491 if (TARGET_80387)
26492 {
26493 tree fenv_index_type = build_index_type (size_int (6));
26494 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
26495 tree fenv_var = create_tmp_var_raw (fenv_type);
26496 TREE_ADDRESSABLE (fenv_var) = 1;
26497 tree fenv_ptr = build_pointer_type (fenv_type);
26498 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
26499 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
26500 tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV);
26501 tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV);
26502 tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW);
26503 tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX);
26504 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
26505 tree hold_fnclex = build_call_expr (fnclex, 0);
26506 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
26507 NULL_TREE, NULL_TREE);
26508 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
26509 hold_fnclex);
26510 *clear = build_call_expr (fnclex, 0);
26511 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
26512 tree fnstsw_call = build_call_expr (fnstsw, 0);
26513 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
26514 fnstsw_call, NULL_TREE, NULL_TREE);
26515 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
26516 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
26517 exceptions_var, exceptions_x87,
26518 NULL_TREE, NULL_TREE);
26519 *update = build2 (COMPOUND_EXPR, integer_type_node,
26520 sw_mod, update_mod);
26521 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
26522 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
26523 }
26524 if (TARGET_SSE && TARGET_SSE_MATH)
26525 {
26526 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
26527 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
26528 tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR);
26529 tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR);
26530 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
26531 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
26532 mxcsr_orig_var, stmxcsr_hold_call,
26533 NULL_TREE, NULL_TREE);
26534 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
26535 mxcsr_orig_var,
26536 build_int_cst (unsigned_type_node, 0x1f80));
26537 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
26538 build_int_cst (unsigned_type_node, 0xffffffc0));
26539 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
26540 mxcsr_mod_var, hold_mod_val,
26541 NULL_TREE, NULL_TREE);
26542 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
26543 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
26544 hold_assign_orig, hold_assign_mod);
26545 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
26546 ldmxcsr_hold_call);
26547 if (*hold)
26548 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
26549 else
26550 *hold = hold_all;
26551 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
26552 if (*clear)
26553 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
26554 ldmxcsr_clear_call);
26555 else
26556 *clear = ldmxcsr_clear_call;
26557 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
26558 tree exceptions_sse = fold_convert (integer_type_node,
26559 stxmcsr_update_call);
26560 if (*update)
26561 {
26562 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
26563 exceptions_var, exceptions_sse);
26564 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
26565 exceptions_var, exceptions_mod);
26566 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
26567 exceptions_assign);
26568 }
26569 else
26570 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
26571 exceptions_sse, NULL_TREE, NULL_TREE);
26572 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
26573 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
26574 ldmxcsr_update_call);
26575 }
26576 tree atomic_feraiseexcept
26577 = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT);
26578 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
26579 1, exceptions_var);
26580 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
26581 atomic_feraiseexcept_call);
26582}
26583
26584#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
26585/* For i386, common symbol is local only for non-PIE binaries. For
26586 x86-64, common symbol is local only for non-PIE binaries or linker
26587 supports copy reloc in PIE binaries. */
26588
26589static bool
26590ix86_binds_local_p (const_tree exp)
26591{
26592 bool direct_extern_access
26593 = (ix86_direct_extern_access
26594 && !(VAR_OR_FUNCTION_DECL_P (exp)
26595 && lookup_attribute (attr_name: "nodirect_extern_access",
26596 DECL_ATTRIBUTES (exp))));
26597 if (!direct_extern_access)
26598 ix86_has_no_direct_extern_access = true;
26599 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
26600 direct_extern_access,
26601 (direct_extern_access
26602 && (!flag_pic
26603 || (TARGET_64BIT
26604 && HAVE_LD_PIE_COPYRELOC != 0))));
26605}
26606
26607/* If flag_pic or ix86_direct_extern_access is false, then neither
26608 local nor global relocs should be placed in readonly memory. */
26609
26610static int
26611ix86_reloc_rw_mask (void)
26612{
26613 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
26614}
26615#endif
26616
26617/* Return true iff ADDR can be used as a symbolic base address. */
26618
26619static bool
26620symbolic_base_address_p (rtx addr)
26621{
26622 if (GET_CODE (addr) == SYMBOL_REF)
26623 return true;
26624
26625 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
26626 return true;
26627
26628 return false;
26629}
26630
26631/* Return true iff ADDR can be used as a base address. */
26632
26633static bool
26634base_address_p (rtx addr)
26635{
26636 if (REG_P (addr))
26637 return true;
26638
26639 if (symbolic_base_address_p (addr))
26640 return true;
26641
26642 return false;
26643}
26644
26645/* If MEM is in the form of [(base+symbase)+offset], extract the three
26646 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
26647 return false. */
26648
26649static bool
26650extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
26651{
26652 rtx addr;
26653
26654 gcc_assert (MEM_P (mem));
26655
26656 addr = XEXP (mem, 0);
26657
26658 if (GET_CODE (addr) == CONST)
26659 addr = XEXP (addr, 0);
26660
26661 if (base_address_p (addr))
26662 {
26663 *base = addr;
26664 *symbase = const0_rtx;
26665 *offset = const0_rtx;
26666 return true;
26667 }
26668
26669 if (GET_CODE (addr) == PLUS
26670 && base_address_p (XEXP (addr, 0)))
26671 {
26672 rtx addend = XEXP (addr, 1);
26673
26674 if (GET_CODE (addend) == CONST)
26675 addend = XEXP (addend, 0);
26676
26677 if (CONST_INT_P (addend))
26678 {
26679 *base = XEXP (addr, 0);
26680 *symbase = const0_rtx;
26681 *offset = addend;
26682 return true;
26683 }
26684
26685 /* Also accept REG + symbolic ref, with or without a CONST_INT
26686 offset. */
26687 if (REG_P (XEXP (addr, 0)))
26688 {
26689 if (symbolic_base_address_p (addr: addend))
26690 {
26691 *base = XEXP (addr, 0);
26692 *symbase = addend;
26693 *offset = const0_rtx;
26694 return true;
26695 }
26696
26697 if (GET_CODE (addend) == PLUS
26698 && symbolic_base_address_p (XEXP (addend, 0))
26699 && CONST_INT_P (XEXP (addend, 1)))
26700 {
26701 *base = XEXP (addr, 0);
26702 *symbase = XEXP (addend, 0);
26703 *offset = XEXP (addend, 1);
26704 return true;
26705 }
26706 }
26707 }
26708
26709 return false;
26710}
26711
26712/* Given OPERANDS of consecutive load/store, check if we can merge
26713 them into move multiple. LOAD is true if they are load instructions.
26714 MODE is the mode of memory operands. */
26715
26716bool
26717ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
26718 machine_mode mode)
26719{
26720 HOST_WIDE_INT offval_1, offval_2, msize;
26721 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
26722 symbase_1, symbase_2, offset_1, offset_2;
26723
26724 if (load)
26725 {
26726 mem_1 = operands[1];
26727 mem_2 = operands[3];
26728 reg_1 = operands[0];
26729 reg_2 = operands[2];
26730 }
26731 else
26732 {
26733 mem_1 = operands[0];
26734 mem_2 = operands[2];
26735 reg_1 = operands[1];
26736 reg_2 = operands[3];
26737 }
26738
26739 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
26740
26741 if (REGNO (reg_1) != REGNO (reg_2))
26742 return false;
26743
26744 /* Check if the addresses are in the form of [base+offset]. */
26745 if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1))
26746 return false;
26747 if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2))
26748 return false;
26749
26750 /* Check if the bases are the same. */
26751 if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
26752 return false;
26753
26754 offval_1 = INTVAL (offset_1);
26755 offval_2 = INTVAL (offset_2);
26756 msize = GET_MODE_SIZE (mode);
26757 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
26758 if (offval_1 + msize != offval_2)
26759 return false;
26760
26761 return true;
26762}
26763
26764/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26765
26766static bool
26767ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
26768 optimization_type opt_type)
26769{
26770 switch (op)
26771 {
26772 case asin_optab:
26773 case acos_optab:
26774 case log1p_optab:
26775 case exp_optab:
26776 case exp10_optab:
26777 case exp2_optab:
26778 case expm1_optab:
26779 case ldexp_optab:
26780 case scalb_optab:
26781 case round_optab:
26782 case lround_optab:
26783 return opt_type == OPTIMIZE_FOR_SPEED;
26784
26785 case rint_optab:
26786 if (SSE_FLOAT_MODE_P (mode1)
26787 && TARGET_SSE_MATH
26788 && !flag_trapping_math
26789 && !TARGET_SSE4_1
26790 && mode1 != HFmode)
26791 return opt_type == OPTIMIZE_FOR_SPEED;
26792 return true;
26793
26794 case floor_optab:
26795 case ceil_optab:
26796 case btrunc_optab:
26797 if (((SSE_FLOAT_MODE_P (mode1)
26798 && TARGET_SSE_MATH
26799 && TARGET_SSE4_1)
26800 || mode1 == HFmode)
26801 && !flag_trapping_math)
26802 return true;
26803 return opt_type == OPTIMIZE_FOR_SPEED;
26804
26805 case rsqrt_optab:
26806 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1);
26807
26808 default:
26809 return true;
26810 }
26811}
26812
26813/* Address space support.
26814
26815 This is not "far pointers" in the 16-bit sense, but an easy way
26816 to use %fs and %gs segment prefixes. Therefore:
26817
26818 (a) All address spaces have the same modes,
26819 (b) All address spaces have the same addresss forms,
26820 (c) While %fs and %gs are technically subsets of the generic
26821 address space, they are probably not subsets of each other.
26822 (d) Since we have no access to the segment base register values
26823 without resorting to a system call, we cannot convert a
26824 non-default address space to a default address space.
26825 Therefore we do not claim %fs or %gs are subsets of generic.
26826
26827 Therefore we can (mostly) use the default hooks. */
26828
26829/* All use of segmentation is assumed to make address 0 valid. */
26830
26831static bool
26832ix86_addr_space_zero_address_valid (addr_space_t as)
26833{
26834 return as != ADDR_SPACE_GENERIC;
26835}
26836
26837static void
26838ix86_init_libfuncs (void)
26839{
26840 if (TARGET_64BIT)
26841 {
26842 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
26843 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
26844 }
26845 else
26846 {
26847 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
26848 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
26849 }
26850
26851#if TARGET_MACHO
26852 darwin_rename_builtins ();
26853#endif
26854}
26855
26856/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
26857 FPU, assume that the fpcw is set to extended precision; when using
26858 only SSE, rounding is correct; when using both SSE and the FPU,
26859 the rounding precision is indeterminate, since either may be chosen
26860 apparently at random. */
26861
26862static enum flt_eval_method
26863ix86_get_excess_precision (enum excess_precision_type type)
26864{
26865 switch (type)
26866 {
26867 case EXCESS_PRECISION_TYPE_FAST:
26868 /* The fastest type to promote to will always be the native type,
26869 whether that occurs with implicit excess precision or
26870 otherwise. */
26871 return TARGET_AVX512FP16
26872 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26873 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
26874 case EXCESS_PRECISION_TYPE_STANDARD:
26875 case EXCESS_PRECISION_TYPE_IMPLICIT:
26876 /* Otherwise, the excess precision we want when we are
26877 in a standards compliant mode, and the implicit precision we
26878 provide would be identical were it not for the unpredictable
26879 cases. */
26880 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
26881 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26882 else if (!TARGET_80387)
26883 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
26884 else if (!TARGET_MIX_SSE_I387)
26885 {
26886 if (!(TARGET_SSE && TARGET_SSE_MATH))
26887 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
26888 else if (TARGET_SSE2)
26889 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
26890 }
26891
26892 /* If we are in standards compliant mode, but we know we will
26893 calculate in unpredictable precision, return
26894 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
26895 excess precision if the target can't guarantee it will honor
26896 it. */
26897 return (type == EXCESS_PRECISION_TYPE_STANDARD
26898 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
26899 : FLT_EVAL_METHOD_UNPREDICTABLE);
26900 case EXCESS_PRECISION_TYPE_FLOAT16:
26901 if (TARGET_80387
26902 && !(TARGET_SSE_MATH && TARGET_SSE))
26903 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
26904 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26905 default:
26906 gcc_unreachable ();
26907 }
26908
26909 return FLT_EVAL_METHOD_UNPREDICTABLE;
26910}
26911
26912/* Return true if _BitInt(N) is supported and fill its details into *INFO. */
26913bool
26914ix86_bitint_type_info (int n, struct bitint_info *info)
26915{
26916 if (n <= 8)
26917 info->limb_mode = QImode;
26918 else if (n <= 16)
26919 info->limb_mode = HImode;
26920 else if (n <= 32 || (!TARGET_64BIT && n > 64))
26921 info->limb_mode = SImode;
26922 else
26923 info->limb_mode = DImode;
26924 info->abi_limb_mode = info->limb_mode;
26925 info->big_endian = false;
26926 info->extended = false;
26927 return true;
26928}
26929
26930/* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
26931 or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
26932 based on long double bits, go with the default one for the others. */
26933
26934static machine_mode
26935ix86_c_mode_for_floating_type (enum tree_index ti)
26936{
26937 if (ti == TI_LONG_DOUBLE_TYPE)
26938 return (TARGET_LONG_DOUBLE_64 ? DFmode
26939 : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
26940 return default_mode_for_floating_type (ti);
26941}
26942
26943/* Returns modified FUNCTION_TYPE for cdtor callabi. */
26944tree
26945ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
26946{
26947 if (TARGET_64BIT
26948 || TARGET_RTD
26949 || ix86_function_type_abi (fntype) != MS_ABI)
26950 return fntype;
26951 /* For 32-bit MS ABI add thiscall attribute. */
26952 tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
26953 TYPE_ATTRIBUTES (fntype));
26954 return build_type_attribute_variant (fntype, attribs);
26955}
26956
26957/* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
26958 decrements by exactly 2 no matter what the position was, there is no pushb.
26959
26960 But as CIE data alignment factor on this arch is -4 for 32bit targets
26961 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
26962 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
26963
26964poly_int64
26965ix86_push_rounding (poly_int64 bytes)
26966{
26967 return ROUND_UP (bytes, UNITS_PER_WORD);
26968}
26969
26970/* Use 8 bits metadata start from bit48 for LAM_U48,
26971 6 bits metadat start from bit57 for LAM_U57. */
26972#define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
26973 ? 48 \
26974 : (ix86_lam_type == lam_u57 ? 57 : 0))
26975#define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
26976 ? 8 \
26977 : (ix86_lam_type == lam_u57 ? 6 : 0))
26978
26979/* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
26980bool
26981ix86_memtag_can_tag_addresses ()
26982{
26983 return ix86_lam_type != lam_none && TARGET_LP64;
26984}
26985
26986/* Implement TARGET_MEMTAG_TAG_SIZE. */
26987unsigned char
26988ix86_memtag_tag_size ()
26989{
26990 return IX86_HWASAN_TAG_SIZE;
26991}
26992
26993/* Implement TARGET_MEMTAG_SET_TAG. */
26994rtx
26995ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
26996{
26997 /* default_memtag_insert_random_tag may
26998 generate tag with value more than 6 bits. */
26999 if (ix86_lam_type == lam_u57)
27000 {
27001 unsigned HOST_WIDE_INT and_imm
27002 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27003
27004 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
27005 }
27006 tag = expand_simple_binop (Pmode, ASHIFT, tag,
27007 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
27008 /* unsignedp = */1, OPTAB_WIDEN);
27009 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
27010 /* unsignedp = */1, OPTAB_DIRECT);
27011 return ret;
27012}
27013
27014/* Implement TARGET_MEMTAG_EXTRACT_TAG. */
27015rtx
27016ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
27017{
27018 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
27019 GEN_INT (IX86_HWASAN_SHIFT), target,
27020 /* unsignedp = */0,
27021 OPTAB_DIRECT);
27022 rtx ret = gen_reg_rtx (QImode);
27023 /* Mask off bit63 when LAM_U57. */
27024 if (ix86_lam_type == lam_u57)
27025 {
27026 unsigned HOST_WIDE_INT and_imm
27027 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
27028 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
27029 gen_int_mode (and_imm, QImode)));
27030 }
27031 else
27032 emit_move_insn (ret, gen_lowpart (QImode, tag));
27033 return ret;
27034}
27035
27036/* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
27037rtx
27038ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
27039{
27040 /* Leave bit63 alone. */
27041 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
27042 + (HOST_WIDE_INT_1U << 63) - 1),
27043 Pmode);
27044 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
27045 tag_mask, target, true,
27046 OPTAB_DIRECT);
27047 gcc_assert (untagged_base);
27048 return untagged_base;
27049}
27050
27051/* Implement TARGET_MEMTAG_ADD_TAG. */
27052rtx
27053ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
27054{
27055 rtx base_tag = gen_reg_rtx (QImode);
27056 rtx base_addr = gen_reg_rtx (Pmode);
27057 rtx tagged_addr = gen_reg_rtx (Pmode);
27058 rtx new_tag = gen_reg_rtx (QImode);
27059 unsigned HOST_WIDE_INT and_imm
27060 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
27061
27062 /* When there's "overflow" in tag adding,
27063 need to mask the most significant bit off. */
27064 emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX));
27065 emit_move_insn (base_addr,
27066 ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX));
27067 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
27068 emit_move_insn (new_tag, base_tag);
27069 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
27070 emit_move_insn (tagged_addr,
27071 ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX));
27072 return plus_constant (Pmode, tagged_addr, offset);
27073}
27074
27075/* Implement TARGET_HAVE_CCMP. */
27076static bool
27077ix86_have_ccmp ()
27078{
27079 return (bool) TARGET_APX_CCMP;
27080}
27081
27082/* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
27083static bool
27084ix86_mode_can_transfer_bits (machine_mode mode)
27085{
27086 if (GET_MODE_CLASS (mode) == MODE_FLOAT
27087 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
27088 switch (GET_MODE_INNER (mode))
27089 {
27090 case E_SFmode:
27091 case E_DFmode:
27092 /* These suffer from normalization upon load when not using SSE. */
27093 return !(ix86_fpmath & FPMATH_387);
27094 default:
27095 return true;
27096 }
27097
27098 return true;
27099}
27100
27101/* Implement TARGET_REDZONE_CLOBBER. */
27102static rtx
27103ix86_redzone_clobber ()
27104{
27105 cfun->machine->asm_redzone_clobber_seen = true;
27106 if (ix86_using_red_zone ())
27107 {
27108 rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
27109 rtx mem = gen_rtx_MEM (BLKmode, base);
27110 set_mem_size (mem, RED_ZONE_SIZE);
27111 return mem;
27112 }
27113 return NULL_RTX;
27114}
27115
27116/* Target-specific selftests. */
27117
27118#if CHECKING_P
27119
27120namespace selftest {
27121
27122/* Verify that hard regs are dumped as expected (in compact mode). */
27123
27124static void
27125ix86_test_dumping_hard_regs ()
27126{
27127 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
27128 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
27129}
27130
27131/* Test dumping an insn with repeated references to the same SCRATCH,
27132 to verify the rtx_reuse code. */
27133
27134static void
27135ix86_test_dumping_memory_blockage ()
27136{
27137 set_new_first_and_last_insn (NULL, NULL);
27138
27139 rtx pat = gen_memory_blockage ();
27140 rtx_reuse_manager r;
27141 r.preprocess (x: pat);
27142
27143 /* Verify that the repeated references to the SCRATCH show use
27144 reuse IDS. The first should be prefixed with a reuse ID,
27145 and the second should be dumped as a "reuse_rtx" of that ID.
27146 The expected string assumes Pmode == DImode. */
27147 if (Pmode == DImode)
27148 ASSERT_RTL_DUMP_EQ_WITH_REUSE
27149 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
27150 " (unspec:BLK [\n"
27151 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
27152 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
27153}
27154
27155/* Verify loading an RTL dump; specifically a dump of copying
27156 a param on x86_64 from a hard reg into the frame.
27157 This test is target-specific since the dump contains target-specific
27158 hard reg names. */
27159
27160static void
27161ix86_test_loading_dump_fragment_1 ()
27162{
27163 rtl_dump_test t (SELFTEST_LOCATION,
27164 locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl"));
27165
27166 rtx_insn *insn = get_insn_by_uid (uid: 1);
27167
27168 /* The block structure and indentation here is purely for
27169 readability; it mirrors the structure of the rtx. */
27170 tree mem_expr;
27171 {
27172 rtx pat = PATTERN (insn);
27173 ASSERT_EQ (SET, GET_CODE (pat));
27174 {
27175 rtx dest = SET_DEST (pat);
27176 ASSERT_EQ (MEM, GET_CODE (dest));
27177 /* Verify the "/c" was parsed. */
27178 ASSERT_TRUE (RTX_FLAG (dest, call));
27179 ASSERT_EQ (SImode, GET_MODE (dest));
27180 {
27181 rtx addr = XEXP (dest, 0);
27182 ASSERT_EQ (PLUS, GET_CODE (addr));
27183 ASSERT_EQ (DImode, GET_MODE (addr));
27184 {
27185 rtx lhs = XEXP (addr, 0);
27186 /* Verify that the "frame" REG was consolidated. */
27187 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
27188 }
27189 {
27190 rtx rhs = XEXP (addr, 1);
27191 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
27192 ASSERT_EQ (-4, INTVAL (rhs));
27193 }
27194 }
27195 /* Verify the "[1 i+0 S4 A32]" was parsed. */
27196 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
27197 /* "i" should have been handled by synthesizing a global int
27198 variable named "i". */
27199 mem_expr = MEM_EXPR (dest);
27200 ASSERT_NE (mem_expr, NULL);
27201 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
27202 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
27203 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
27204 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
27205 /* "+0". */
27206 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
27207 ASSERT_EQ (0, MEM_OFFSET (dest));
27208 /* "S4". */
27209 ASSERT_EQ (4, MEM_SIZE (dest));
27210 /* "A32. */
27211 ASSERT_EQ (32, MEM_ALIGN (dest));
27212 }
27213 {
27214 rtx src = SET_SRC (pat);
27215 ASSERT_EQ (REG, GET_CODE (src));
27216 ASSERT_EQ (SImode, GET_MODE (src));
27217 ASSERT_EQ (5, REGNO (src));
27218 tree reg_expr = REG_EXPR (src);
27219 /* "i" here should point to the same var as for the MEM_EXPR. */
27220 ASSERT_EQ (reg_expr, mem_expr);
27221 }
27222 }
27223}
27224
27225/* Verify that the RTL loader copes with a call_insn dump.
27226 This test is target-specific since the dump contains a target-specific
27227 hard reg name. */
27228
27229static void
27230ix86_test_loading_call_insn ()
27231{
27232 /* The test dump includes register "xmm0", where requires TARGET_SSE
27233 to exist. */
27234 if (!TARGET_SSE)
27235 return;
27236
27237 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl"));
27238
27239 rtx_insn *insn = get_insns ();
27240 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
27241
27242 /* "/j". */
27243 ASSERT_TRUE (RTX_FLAG (insn, jump));
27244
27245 rtx pat = PATTERN (insn);
27246 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
27247
27248 /* Verify REG_NOTES. */
27249 {
27250 /* "(expr_list:REG_CALL_DECL". */
27251 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
27252 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
27253 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
27254
27255 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
27256 rtx_expr_list *note1 = note0->next ();
27257 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
27258
27259 ASSERT_EQ (NULL, note1->next ());
27260 }
27261
27262 /* Verify CALL_INSN_FUNCTION_USAGE. */
27263 {
27264 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
27265 rtx_expr_list *usage
27266 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
27267 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
27268 ASSERT_EQ (DFmode, GET_MODE (usage));
27269 ASSERT_EQ (USE, GET_CODE (usage->element ()));
27270 ASSERT_EQ (NULL, usage->next ());
27271 }
27272}
27273
27274/* Verify that the RTL loader copes a dump from print_rtx_function.
27275 This test is target-specific since the dump contains target-specific
27276 hard reg names. */
27277
27278static void
27279ix86_test_loading_full_dump ()
27280{
27281 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl"));
27282
27283 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
27284
27285 rtx_insn *insn_1 = get_insn_by_uid (uid: 1);
27286 ASSERT_EQ (NOTE, GET_CODE (insn_1));
27287
27288 rtx_insn *insn_7 = get_insn_by_uid (uid: 7);
27289 ASSERT_EQ (INSN, GET_CODE (insn_7));
27290 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
27291
27292 rtx_insn *insn_15 = get_insn_by_uid (uid: 15);
27293 ASSERT_EQ (INSN, GET_CODE (insn_15));
27294 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
27295
27296 /* Verify crtl->return_rtx. */
27297 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
27298 ASSERT_EQ (0, REGNO (crtl->return_rtx));
27299 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
27300}
27301
27302/* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
27303 In particular, verify that it correctly loads the 2nd operand.
27304 This test is target-specific since these are machine-specific
27305 operands (and enums). */
27306
27307static void
27308ix86_test_loading_unspec ()
27309{
27310 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl"));
27311
27312 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
27313
27314 ASSERT_TRUE (cfun);
27315
27316 /* Test of an UNSPEC. */
27317 rtx_insn *insn = get_insns ();
27318 ASSERT_EQ (INSN, GET_CODE (insn));
27319 rtx set = single_set (insn);
27320 ASSERT_NE (NULL, set);
27321 rtx dst = SET_DEST (set);
27322 ASSERT_EQ (MEM, GET_CODE (dst));
27323 rtx src = SET_SRC (set);
27324 ASSERT_EQ (UNSPEC, GET_CODE (src));
27325 ASSERT_EQ (BLKmode, GET_MODE (src));
27326 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
27327
27328 rtx v0 = XVECEXP (src, 0, 0);
27329
27330 /* Verify that the two uses of the first SCRATCH have pointer
27331 equality. */
27332 rtx scratch_a = XEXP (dst, 0);
27333 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
27334
27335 rtx scratch_b = XEXP (v0, 0);
27336 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
27337
27338 ASSERT_EQ (scratch_a, scratch_b);
27339
27340 /* Verify that the two mems are thus treated as equal. */
27341 ASSERT_TRUE (rtx_equal_p (dst, v0));
27342
27343 /* Verify that the insn is recognized. */
27344 ASSERT_NE(-1, recog_memoized (insn));
27345
27346 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
27347 insn = NEXT_INSN (insn);
27348 ASSERT_EQ (INSN, GET_CODE (insn));
27349
27350 set = single_set (insn);
27351 ASSERT_NE (NULL, set);
27352
27353 src = SET_SRC (set);
27354 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
27355 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
27356}
27357
27358/* Run all target-specific selftests. */
27359
27360static void
27361ix86_run_selftests (void)
27362{
27363 ix86_test_dumping_hard_regs ();
27364 ix86_test_dumping_memory_blockage ();
27365
27366 /* Various tests of loading RTL dumps, here because they contain
27367 ix86-isms (e.g. names of hard regs). */
27368 ix86_test_loading_dump_fragment_1 ();
27369 ix86_test_loading_call_insn ();
27370 ix86_test_loading_full_dump ();
27371 ix86_test_loading_unspec ();
27372}
27373
27374} // namespace selftest
27375
27376#endif /* CHECKING_P */
27377
27378static const scoped_attribute_specs *const ix86_attribute_table[] =
27379{
27380 &ix86_gnu_attribute_table
27381};
27382
27383/* Initialize the GCC target structure. */
27384#undef TARGET_RETURN_IN_MEMORY
27385#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
27386
27387#undef TARGET_LEGITIMIZE_ADDRESS
27388#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
27389
27390#undef TARGET_ATTRIBUTE_TABLE
27391#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
27392#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
27393#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
27394#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27395# undef TARGET_MERGE_DECL_ATTRIBUTES
27396# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
27397#endif
27398
27399#undef TARGET_INVALID_CONVERSION
27400#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
27401
27402#undef TARGET_INVALID_UNARY_OP
27403#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
27404
27405#undef TARGET_INVALID_BINARY_OP
27406#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
27407
27408#undef TARGET_COMP_TYPE_ATTRIBUTES
27409#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
27410
27411#undef TARGET_INIT_BUILTINS
27412#define TARGET_INIT_BUILTINS ix86_init_builtins
27413#undef TARGET_BUILTIN_DECL
27414#define TARGET_BUILTIN_DECL ix86_builtin_decl
27415#undef TARGET_EXPAND_BUILTIN
27416#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
27417
27418#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
27419#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
27420 ix86_builtin_vectorized_function
27421
27422#undef TARGET_VECTORIZE_BUILTIN_GATHER
27423#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
27424
27425#undef TARGET_VECTORIZE_BUILTIN_SCATTER
27426#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
27427
27428#undef TARGET_BUILTIN_RECIPROCAL
27429#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
27430
27431#undef TARGET_ASM_FUNCTION_EPILOGUE
27432#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
27433
27434#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
27435#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
27436 ix86_print_patchable_function_entry
27437
27438#undef TARGET_ENCODE_SECTION_INFO
27439#ifndef SUBTARGET_ENCODE_SECTION_INFO
27440#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
27441#else
27442#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
27443#endif
27444
27445#undef TARGET_ASM_OPEN_PAREN
27446#define TARGET_ASM_OPEN_PAREN ""
27447#undef TARGET_ASM_CLOSE_PAREN
27448#define TARGET_ASM_CLOSE_PAREN ""
27449
27450#undef TARGET_ASM_BYTE_OP
27451#define TARGET_ASM_BYTE_OP ASM_BYTE
27452
27453#undef TARGET_ASM_ALIGNED_HI_OP
27454#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
27455#undef TARGET_ASM_ALIGNED_SI_OP
27456#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
27457#ifdef ASM_QUAD
27458#undef TARGET_ASM_ALIGNED_DI_OP
27459#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
27460#endif
27461
27462#undef TARGET_PROFILE_BEFORE_PROLOGUE
27463#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
27464
27465#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
27466#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
27467
27468#undef TARGET_ASM_UNALIGNED_HI_OP
27469#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
27470#undef TARGET_ASM_UNALIGNED_SI_OP
27471#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
27472#undef TARGET_ASM_UNALIGNED_DI_OP
27473#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
27474
27475#undef TARGET_PRINT_OPERAND
27476#define TARGET_PRINT_OPERAND ix86_print_operand
27477#undef TARGET_PRINT_OPERAND_ADDRESS
27478#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
27479#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
27480#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
27481#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
27482#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
27483
27484#undef TARGET_SCHED_INIT_GLOBAL
27485#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
27486#undef TARGET_SCHED_ADJUST_COST
27487#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
27488#undef TARGET_SCHED_ISSUE_RATE
27489#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
27490#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
27491#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
27492 ia32_multipass_dfa_lookahead
27493#undef TARGET_SCHED_MACRO_FUSION_P
27494#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
27495#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
27496#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
27497
27498#undef TARGET_FUNCTION_OK_FOR_SIBCALL
27499#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
27500
27501#undef TARGET_MEMMODEL_CHECK
27502#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
27503
27504#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
27505#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
27506
27507#ifdef HAVE_AS_TLS
27508#undef TARGET_HAVE_TLS
27509#define TARGET_HAVE_TLS true
27510#endif
27511#undef TARGET_CANNOT_FORCE_CONST_MEM
27512#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
27513#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
27514#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
27515
27516#undef TARGET_DELEGITIMIZE_ADDRESS
27517#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
27518
27519#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
27520#define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
27521
27522#undef TARGET_MS_BITFIELD_LAYOUT_P
27523#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
27524
27525#if TARGET_MACHO
27526#undef TARGET_BINDS_LOCAL_P
27527#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
27528#else
27529#undef TARGET_BINDS_LOCAL_P
27530#define TARGET_BINDS_LOCAL_P ix86_binds_local_p
27531#endif
27532#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27533#undef TARGET_BINDS_LOCAL_P
27534#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
27535#endif
27536
27537#undef TARGET_ASM_OUTPUT_MI_THUNK
27538#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
27539#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
27540#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
27541
27542#undef TARGET_ASM_FILE_START
27543#define TARGET_ASM_FILE_START x86_file_start
27544
27545#undef TARGET_OPTION_OVERRIDE
27546#define TARGET_OPTION_OVERRIDE ix86_option_override
27547
27548#undef TARGET_REGISTER_MOVE_COST
27549#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
27550#undef TARGET_MEMORY_MOVE_COST
27551#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
27552#undef TARGET_RTX_COSTS
27553#define TARGET_RTX_COSTS ix86_rtx_costs
27554#undef TARGET_INSN_COST
27555#define TARGET_INSN_COST ix86_insn_cost
27556#undef TARGET_ADDRESS_COST
27557#define TARGET_ADDRESS_COST ix86_address_cost
27558
27559#undef TARGET_OVERLAP_OP_BY_PIECES_P
27560#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
27561
27562#undef TARGET_FLAGS_REGNUM
27563#define TARGET_FLAGS_REGNUM FLAGS_REG
27564#undef TARGET_FIXED_CONDITION_CODE_REGS
27565#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
27566#undef TARGET_CC_MODES_COMPATIBLE
27567#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
27568
27569#undef TARGET_MACHINE_DEPENDENT_REORG
27570#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
27571
27572#undef TARGET_BUILD_BUILTIN_VA_LIST
27573#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
27574
27575#undef TARGET_FOLD_BUILTIN
27576#define TARGET_FOLD_BUILTIN ix86_fold_builtin
27577
27578#undef TARGET_GIMPLE_FOLD_BUILTIN
27579#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
27580
27581#undef TARGET_COMPARE_VERSION_PRIORITY
27582#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
27583
27584#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
27585#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
27586 ix86_generate_version_dispatcher_body
27587
27588#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
27589#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
27590 ix86_get_function_versions_dispatcher
27591
27592#undef TARGET_ENUM_VA_LIST_P
27593#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
27594
27595#undef TARGET_FN_ABI_VA_LIST
27596#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
27597
27598#undef TARGET_CANONICAL_VA_LIST_TYPE
27599#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
27600
27601#undef TARGET_EXPAND_BUILTIN_VA_START
27602#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
27603
27604#undef TARGET_MD_ASM_ADJUST
27605#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
27606
27607#undef TARGET_C_EXCESS_PRECISION
27608#define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
27609#undef TARGET_C_BITINT_TYPE_INFO
27610#define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
27611#undef TARGET_C_MODE_FOR_FLOATING_TYPE
27612#define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
27613#undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
27614#define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
27615#undef TARGET_PROMOTE_PROTOTYPES
27616#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
27617#undef TARGET_PUSH_ARGUMENT
27618#define TARGET_PUSH_ARGUMENT ix86_push_argument
27619#undef TARGET_SETUP_INCOMING_VARARGS
27620#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
27621#undef TARGET_MUST_PASS_IN_STACK
27622#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
27623#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
27624#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
27625#undef TARGET_FUNCTION_ARG_ADVANCE
27626#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
27627#undef TARGET_FUNCTION_ARG
27628#define TARGET_FUNCTION_ARG ix86_function_arg
27629#undef TARGET_INIT_PIC_REG
27630#define TARGET_INIT_PIC_REG ix86_init_pic_reg
27631#undef TARGET_USE_PSEUDO_PIC_REG
27632#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
27633#undef TARGET_FUNCTION_ARG_BOUNDARY
27634#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
27635#undef TARGET_PASS_BY_REFERENCE
27636#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
27637#undef TARGET_INTERNAL_ARG_POINTER
27638#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
27639#undef TARGET_UPDATE_STACK_BOUNDARY
27640#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
27641#undef TARGET_GET_DRAP_RTX
27642#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
27643#undef TARGET_STRICT_ARGUMENT_NAMING
27644#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
27645#undef TARGET_STATIC_CHAIN
27646#define TARGET_STATIC_CHAIN ix86_static_chain
27647#undef TARGET_TRAMPOLINE_INIT
27648#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
27649#undef TARGET_RETURN_POPS_ARGS
27650#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
27651
27652#undef TARGET_WARN_FUNC_RETURN
27653#define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
27654
27655#undef TARGET_LEGITIMATE_COMBINED_INSN
27656#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
27657
27658#undef TARGET_ASAN_SHADOW_OFFSET
27659#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
27660
27661#undef TARGET_GIMPLIFY_VA_ARG_EXPR
27662#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
27663
27664#undef TARGET_SCALAR_MODE_SUPPORTED_P
27665#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
27666
27667#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
27668#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
27669ix86_libgcc_floating_mode_supported_p
27670
27671#undef TARGET_VECTOR_MODE_SUPPORTED_P
27672#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
27673
27674#undef TARGET_C_MODE_FOR_SUFFIX
27675#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
27676
27677#ifdef HAVE_AS_TLS
27678#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
27679#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
27680#endif
27681
27682#ifdef SUBTARGET_INSERT_ATTRIBUTES
27683#undef TARGET_INSERT_ATTRIBUTES
27684#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
27685#endif
27686
27687#undef TARGET_MANGLE_TYPE
27688#define TARGET_MANGLE_TYPE ix86_mangle_type
27689
27690#undef TARGET_EMIT_SUPPORT_TINFOS
27691#define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
27692
27693#undef TARGET_STACK_PROTECT_GUARD
27694#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
27695
27696#undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
27697#define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
27698 ix86_stack_protect_runtime_enabled_p
27699
27700#if !TARGET_MACHO
27701#undef TARGET_STACK_PROTECT_FAIL
27702#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
27703#endif
27704
27705#undef TARGET_FUNCTION_VALUE
27706#define TARGET_FUNCTION_VALUE ix86_function_value
27707
27708#undef TARGET_FUNCTION_VALUE_REGNO_P
27709#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
27710
27711#undef TARGET_ZERO_CALL_USED_REGS
27712#define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
27713
27714#undef TARGET_PROMOTE_FUNCTION_MODE
27715#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
27716
27717#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
27718#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
27719
27720#undef TARGET_MEMBER_TYPE_FORCES_BLK
27721#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
27722
27723#undef TARGET_INSTANTIATE_DECLS
27724#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
27725
27726#undef TARGET_SECONDARY_RELOAD
27727#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
27728#undef TARGET_SECONDARY_MEMORY_NEEDED
27729#define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
27730#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
27731#define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
27732
27733#undef TARGET_CLASS_MAX_NREGS
27734#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
27735
27736#undef TARGET_PREFERRED_RELOAD_CLASS
27737#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
27738#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
27739#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
27740/* When this hook returns true for MODE, the compiler allows
27741 registers explicitly used in the rtl to be used as spill registers
27742 but prevents the compiler from extending the lifetime of these
27743 registers. */
27744#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
27745#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
27746#undef TARGET_CLASS_LIKELY_SPILLED_P
27747#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
27748#undef TARGET_CALLEE_SAVE_COST
27749#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
27750
27751#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
27752#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
27753 ix86_builtin_vectorization_cost
27754#undef TARGET_VECTORIZE_VEC_PERM_CONST
27755#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
27756#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
27757#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
27758 ix86_preferred_simd_mode
27759#undef TARGET_VECTORIZE_SPLIT_REDUCTION
27760#define TARGET_VECTORIZE_SPLIT_REDUCTION \
27761 ix86_split_reduction
27762#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
27763#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
27764 ix86_autovectorize_vector_modes
27765#undef TARGET_VECTORIZE_GET_MASK_MODE
27766#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
27767#undef TARGET_VECTORIZE_CREATE_COSTS
27768#define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
27769
27770#undef TARGET_SET_CURRENT_FUNCTION
27771#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
27772
27773#undef TARGET_OPTION_VALID_ATTRIBUTE_P
27774#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
27775
27776#undef TARGET_OPTION_SAVE
27777#define TARGET_OPTION_SAVE ix86_function_specific_save
27778
27779#undef TARGET_OPTION_RESTORE
27780#define TARGET_OPTION_RESTORE ix86_function_specific_restore
27781
27782#undef TARGET_OPTION_POST_STREAM_IN
27783#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
27784
27785#undef TARGET_OPTION_PRINT
27786#define TARGET_OPTION_PRINT ix86_function_specific_print
27787
27788#undef TARGET_OPTION_FUNCTION_VERSIONS
27789#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
27790
27791#undef TARGET_CAN_INLINE_P
27792#define TARGET_CAN_INLINE_P ix86_can_inline_p
27793
27794#undef TARGET_LEGITIMATE_ADDRESS_P
27795#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
27796
27797#undef TARGET_REGISTER_PRIORITY
27798#define TARGET_REGISTER_PRIORITY ix86_register_priority
27799
27800#undef TARGET_REGISTER_USAGE_LEVELING_P
27801#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
27802
27803#undef TARGET_LEGITIMATE_CONSTANT_P
27804#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
27805
27806#undef TARGET_COMPUTE_FRAME_LAYOUT
27807#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
27808
27809#undef TARGET_FRAME_POINTER_REQUIRED
27810#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
27811
27812#undef TARGET_CAN_ELIMINATE
27813#define TARGET_CAN_ELIMINATE ix86_can_eliminate
27814
27815#undef TARGET_EXTRA_LIVE_ON_ENTRY
27816#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
27817
27818#undef TARGET_ASM_CODE_END
27819#define TARGET_ASM_CODE_END ix86_code_end
27820
27821#undef TARGET_CONDITIONAL_REGISTER_USAGE
27822#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
27823
27824#undef TARGET_CANONICALIZE_COMPARISON
27825#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
27826
27827#undef TARGET_LOOP_UNROLL_ADJUST
27828#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
27829
27830/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
27831#undef TARGET_SPILL_CLASS
27832#define TARGET_SPILL_CLASS ix86_spill_class
27833
27834#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
27835#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
27836 ix86_simd_clone_compute_vecsize_and_simdlen
27837
27838#undef TARGET_SIMD_CLONE_ADJUST
27839#define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
27840
27841#undef TARGET_SIMD_CLONE_USABLE
27842#define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
27843
27844#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
27845#define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
27846
27847#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
27848#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
27849 ix86_float_exceptions_rounding_supported_p
27850
27851#undef TARGET_MODE_EMIT
27852#define TARGET_MODE_EMIT ix86_emit_mode_set
27853
27854#undef TARGET_MODE_NEEDED
27855#define TARGET_MODE_NEEDED ix86_mode_needed
27856
27857#undef TARGET_MODE_AFTER
27858#define TARGET_MODE_AFTER ix86_mode_after
27859
27860#undef TARGET_MODE_ENTRY
27861#define TARGET_MODE_ENTRY ix86_mode_entry
27862
27863#undef TARGET_MODE_EXIT
27864#define TARGET_MODE_EXIT ix86_mode_exit
27865
27866#undef TARGET_MODE_PRIORITY
27867#define TARGET_MODE_PRIORITY ix86_mode_priority
27868
27869#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
27870#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
27871
27872#undef TARGET_OFFLOAD_OPTIONS
27873#define TARGET_OFFLOAD_OPTIONS \
27874 ix86_offload_options
27875
27876#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
27877#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
27878
27879#undef TARGET_OPTAB_SUPPORTED_P
27880#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
27881
27882#undef TARGET_HARD_REGNO_SCRATCH_OK
27883#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
27884
27885#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
27886#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
27887
27888#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
27889#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
27890
27891#undef TARGET_INIT_LIBFUNCS
27892#define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
27893
27894#undef TARGET_EXPAND_DIVMOD_LIBFUNC
27895#define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
27896
27897#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
27898#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
27899
27900#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
27901#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
27902
27903#undef TARGET_HARD_REGNO_NREGS
27904#define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
27905#undef TARGET_HARD_REGNO_MODE_OK
27906#define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
27907
27908#undef TARGET_MODES_TIEABLE_P
27909#define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
27910
27911#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
27912#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
27913 ix86_hard_regno_call_part_clobbered
27914
27915#undef TARGET_INSN_CALLEE_ABI
27916#define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
27917
27918#undef TARGET_CAN_CHANGE_MODE_CLASS
27919#define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
27920
27921#undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
27922#define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
27923
27924#undef TARGET_STATIC_RTX_ALIGNMENT
27925#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
27926#undef TARGET_CONSTANT_ALIGNMENT
27927#define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
27928
27929#undef TARGET_EMPTY_RECORD_P
27930#define TARGET_EMPTY_RECORD_P ix86_is_empty_record
27931
27932#undef TARGET_WARN_PARAMETER_PASSING_ABI
27933#define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
27934
27935#undef TARGET_GET_MULTILIB_ABI_NAME
27936#define TARGET_GET_MULTILIB_ABI_NAME \
27937 ix86_get_multilib_abi_name
27938
27939#undef TARGET_IFUNC_REF_LOCAL_OK
27940#define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
27941
27942#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27943# undef TARGET_ASM_RELOC_RW_MASK
27944# define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
27945#endif
27946
27947#undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
27948#define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
27949
27950#undef TARGET_MEMTAG_ADD_TAG
27951#define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
27952
27953#undef TARGET_MEMTAG_SET_TAG
27954#define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
27955
27956#undef TARGET_MEMTAG_EXTRACT_TAG
27957#define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
27958
27959#undef TARGET_MEMTAG_UNTAGGED_POINTER
27960#define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
27961
27962#undef TARGET_MEMTAG_TAG_SIZE
27963#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
27964
27965#undef TARGET_GEN_CCMP_FIRST
27966#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
27967
27968#undef TARGET_GEN_CCMP_NEXT
27969#define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
27970
27971#undef TARGET_HAVE_CCMP
27972#define TARGET_HAVE_CCMP ix86_have_ccmp
27973
27974#undef TARGET_MODE_CAN_TRANSFER_BITS
27975#define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
27976
27977#undef TARGET_REDZONE_CLOBBER
27978#define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
27979
27980static bool
27981ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
27982{
27983#ifdef OPTION_GLIBC
27984 if (OPTION_GLIBC)
27985 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
27986 else
27987 return false;
27988#else
27989 return false;
27990#endif
27991}
27992
27993#undef TARGET_LIBC_HAS_FAST_FUNCTION
27994#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
27995
27996static unsigned
27997ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
27998 bool boundary_p)
27999{
28000#ifdef OPTION_GLIBC
28001 bool glibc_p = OPTION_GLIBC;
28002#else
28003 bool glibc_p = false;
28004#endif
28005 if (glibc_p)
28006 {
28007 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
28008 unsigned int libmvec_ret = 0;
28009 if (!flag_trapping_math
28010 && flag_unsafe_math_optimizations
28011 && flag_finite_math_only
28012 && !flag_signed_zeros
28013 && !flag_errno_math)
28014 switch (cfn)
28015 {
28016 CASE_CFN_COS:
28017 CASE_CFN_COS_FN:
28018 CASE_CFN_SIN:
28019 CASE_CFN_SIN_FN:
28020 if (!boundary_p)
28021 {
28022 /* With non-default rounding modes, libmvec provides
28023 complete garbage in results. E.g.
28024 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
28025 returns 0.00333309174f rather than 1.40129846e-45f. */
28026 if (flag_rounding_math)
28027 return ~0U;
28028 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
28029 claims libmvec maximum error is 4ulps.
28030 My own random testing indicates 2ulps for SFmode and
28031 0.5ulps for DFmode, but let's go with the 4ulps. */
28032 libmvec_ret = 4;
28033 }
28034 break;
28035 default:
28036 break;
28037 }
28038 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
28039 boundary_p);
28040 return MAX (ret, libmvec_ret);
28041 }
28042 return default_libm_function_max_error (cfn, mode, boundary_p);
28043}
28044
28045#undef TARGET_LIBM_FUNCTION_MAX_ERROR
28046#define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
28047
28048#if TARGET_MACHO
28049static bool
28050ix86_cannot_copy_insn_p (rtx_insn *insn)
28051{
28052 if (TARGET_64BIT)
28053 return false;
28054
28055 rtx set = single_set (insn);
28056 if (set)
28057 {
28058 rtx src = SET_SRC (set);
28059 if (GET_CODE (src) == UNSPEC
28060 && XINT (src, 1) == UNSPEC_SET_GOT)
28061 return true;
28062 }
28063 return false;
28064}
28065
28066#undef TARGET_CANNOT_COPY_INSN_P
28067#define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
28068
28069#endif
28070
28071#if CHECKING_P
28072#undef TARGET_RUN_TARGET_SELFTESTS
28073#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
28074#endif /* #if CHECKING_P */
28075
28076#undef TARGET_DOCUMENTATION_NAME
28077#define TARGET_DOCUMENTATION_NAME "x86"
28078
28079struct gcc_target targetm = TARGET_INITIALIZER;
28080
28081#include "gt-i386.h"
28082

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of gcc/config/i386/i386.cc