X86Subtarget.cpp source code [llvm/lib/Target/X86/X86Subtarget.cpp]

1	//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the X86 specific subclass of TargetSubtargetInfo.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "X86Subtarget.h"
14	#include "GISel/X86CallLowering.h"
15	#include "GISel/X86LegalizerInfo.h"
16	#include "GISel/X86RegisterBankInfo.h"
17	#include "MCTargetDesc/X86BaseInfo.h"
18	#include "X86.h"
19	#include "X86MacroFusion.h"
20	#include "X86TargetMachine.h"
21	#include "llvm/CodeGen/GlobalISel/CallLowering.h"
22	#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
23	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24	#include "llvm/CodeGen/ScheduleDAGMutation.h"
25	#include "llvm/IR/Attributes.h"
26	#include "llvm/IR/ConstantRange.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/IR/GlobalValue.h"
29	#include "llvm/Support/Casting.h"
30	#include "llvm/Support/CodeGen.h"
31	#include "llvm/Support/CommandLine.h"
32	#include "llvm/Support/Debug.h"
33	#include "llvm/Support/ErrorHandling.h"
34	#include "llvm/Support/raw_ostream.h"
35	#include "llvm/Target/TargetMachine.h"
36	#include "llvm/TargetParser/Triple.h"
37
38	#if defined(_MSC_VER)
39	#include <intrin.h>
40	#endif
41
42	using namespace llvm;
43
44	#define DEBUG_TYPE "subtarget"
45
46	#define GET_SUBTARGETINFO_TARGET_DESC
47	#define GET_SUBTARGETINFO_CTOR
48	#include "X86GenSubtargetInfo.inc"
49
50	// Temporary option to control early if-conversion for x86 while adding machine
51	// models.
52	static cl::opt<bool>
53	X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
54	cl::desc ("Enable early if-conversion on X86"));
55
56
57	/// Classify a blockaddress reference for the current subtarget according to how
58	/// we should reference it in a non-pcrel context.
59	unsigned char X86Subtarget::classifyBlockAddressReference() const {
60	return classifyLocalReference(GV: nullptr);
61	}
62
63	/// Classify a global variable reference for the current subtarget according to
64	/// how we should reference it in a non-pcrel context.
65	unsigned char
66	X86Subtarget::classifyGlobalReference(const GlobalValue GV) const* {
67	return classifyGlobalReference(GV, M: *GV->getParent());
68	}
69
70	unsigned char
71	X86Subtarget::classifyLocalReference(const GlobalValue GV) const* {
72	CodeModel::Model CM = TM.getCodeModel();
73	// Tagged globals have non-zero upper bits, which makes direct references
74	// require a 64-bit immediate. With the small/medium code models this causes
75	// relocation errors, so we go through the GOT instead.
76	if (AllowTaggedGlobals && CM != CodeModel::Large && GV && !isa<Function>(GV))
77	return X86II::MO_GOTPCREL_NORELAX;
78
79	// If we're not PIC, it's not very interesting.
80	if (!isPositionIndependent())
81	return X86II::MO_NO_FLAG;
82
83	if (is64Bit()) {
84	// 64-bit ELF PIC local references may use GOTOFF relocations.
85	if (isTargetELF()) {
86	assert(CM != CodeModel::Tiny &&
87	"Tiny codesize model not supported on X86");
88	// In the large code model, all text is far from any global data, so we
89	// use GOTOFF.
90	if (CM == CodeModel::Large)
91	return X86II::MO_GOTOFF;
92	// Large GlobalValues use GOTOFF, otherwise use RIP-rel access.
93	if (GV)
94	return TM.isLargeGlobalValue(GV) ? X86II::MO_GOTOFF : X86II::MO_NO_FLAG;
95	// GV == nullptr is for all other non-GlobalValue global data like the
96	// constant pool, jump tables, labels, etc. The small and medium code
97	// models treat these as accessible with a RIP-rel access.
98	return X86II::MO_NO_FLAG;
99	}
100
101	// Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
102	// both of which use MO_NO_FLAG.
103	return X86II::MO_NO_FLAG;
104	}
105
106	// The COFF dynamic linker just patches the executable sections.
107	if (isTargetCOFF())
108	return X86II::MO_NO_FLAG;
109
110	if (isTargetDarwin()) {
111	// 32 bit macho has no relocation for a-b if a is undefined, even if
112	// b is in the section that is being relocated.
113	// This means we have to use o load even for GVs that are known to be
114	// local to the dso.
115	if (GV && (GV->isDeclarationForLinker() \|\| GV->hasCommonLinkage()))
116	return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
117
118	return X86II::MO_PIC_BASE_OFFSET;
119	}
120
121	return X86II::MO_GOTOFF;
122	}
123
124	unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
125	const Module &M) const {
126	// The static large model never uses stubs.
127	if (TM.getCodeModel() == CodeModel::Large && !isPositionIndependent())
128	return X86II::MO_NO_FLAG;
129
130	// Absolute symbols can be referenced directly.
131	if (GV) {
132	if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
133	// See if we can use the 8-bit immediate form. Note that some instructions
134	// will sign extend the immediate operand, so to be conservative we only
135	// accept the range [0,128).
136	if (CR ->getUnsignedMax().ult(RHS: `128`))
137	return X86II::MO_ABS8;
138	else
139	return X86II::MO_NO_FLAG;
140	}
141	}
142
143	if (TM.shouldAssumeDSOLocal(GV))
144	return classifyLocalReference(GV);
145
146	if (isTargetCOFF()) {
147	// ExternalSymbolSDNode like _tls_index.
148	if (!GV)
149	return X86II::MO_NO_FLAG;
150	if (GV->hasDLLImportStorageClass())
151	return X86II::MO_DLLIMPORT;
152	return X86II::MO_COFFSTUB;
153	}
154	// Some JIT users use -win32-elf triples; these shouldn't use GOT tables.*
155	if (isOSWindows())
156	return X86II::MO_NO_FLAG;
157
158	if (is64Bit()) {
159	// ELF supports a large, truly PIC code model with non-PC relative GOT
160	// references. Other object file formats do not. Use the no-flag, 64-bit
161	// reference for them.
162	if (TM.getCodeModel() == CodeModel::Large)
163	return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
164	// Tagged globals have non-zero upper bits, which makes direct references
165	// require a 64-bit immediate. So we can't let the linker relax the
166	// relocation to a 32-bit RIP-relative direct reference.
167	if (AllowTaggedGlobals && GV && !isa<Function>(GV))
168	return X86II::MO_GOTPCREL_NORELAX;
169	return X86II::MO_GOTPCREL;
170	}
171
172	if (isTargetDarwin()) {
173	if (!isPositionIndependent())
174	return X86II::MO_DARWIN_NONLAZY;
175	return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
176	}
177
178	// 32-bit ELF references GlobalAddress directly in static relocation model.
179	// We cannot use MO_GOT because EBX may not be set up.
180	if (TM.getRelocationModel() == Reloc::Static)
181	return X86II::MO_NO_FLAG;
182	return X86II::MO_GOT;
183	}
184
185	unsigned char
186	X86Subtarget::classifyGlobalFunctionReference(const GlobalValue GV) const* {
187	return classifyGlobalFunctionReference(GV, M: *GV->getParent());
188	}
189
190	unsigned char
191	X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
192	const Module &M) const {
193	if (TM.shouldAssumeDSOLocal(GV))
194	return X86II::MO_NO_FLAG;
195
196	// Functions on COFF can be non-DSO local for three reasons:
197	// - They are intrinsic functions (!GV)
198	// - They are marked dllimport
199	// - They are extern_weak, and a stub is needed
200	if (isTargetCOFF()) {
201	if (!GV)
202	return X86II::MO_NO_FLAG;
203	if (GV->hasDLLImportStorageClass())
204	return X86II::MO_DLLIMPORT;
205	return X86II::MO_COFFSTUB;
206	}
207
208	const Function *F = dyn_cast_or_null<Function>(Val: GV);
209
210	if (isTargetELF()) {
211	if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
212	// According to psABI, PLT stub clobbers XMM8-XMM15.
213	// In Regcall calling convention those registers are used for passing
214	// parameters. Thus we need to prevent lazy binding in Regcall.
215	return X86II::MO_GOTPCREL;
216	// If PLT must be avoided then the call should be via GOTPCREL.
217	if (((F && F->hasFnAttribute(Attribute::NonLazyBind)) \|\|
218	(!F && M.getRtLibUseGOT())) &&
219	is64Bit())
220	return X86II::MO_GOTPCREL;
221	// Reference ExternalSymbol directly in static relocation model.
222	if (!is64Bit() && !GV && TM.getRelocationModel() == Reloc::Static)
223	return X86II::MO_NO_FLAG;
224	return X86II::MO_PLT;
225	}
226
227	if (is64Bit()) {
228	if (F && F->hasFnAttribute(Attribute::NonLazyBind))
229	// If the function is marked as non-lazy, generate an indirect call
230	// which loads from the GOT directly. This avoids runtime overhead
231	// at the cost of eager binding (and one extra byte of encoding).
232	return X86II::MO_GOTPCREL;
233	return X86II::MO_NO_FLAG;
234	}
235
236	return X86II::MO_NO_FLAG;
237	}
238
239	/// Return true if the subtarget allows calls to immediate address.
240	bool X86Subtarget::isLegalToCallImmediateAddr() const {
241	// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
242	// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
243	// the following check for Win32 should be removed.
244	if (Is64Bit \|\| isTargetWin32())
245	return false;
246	return isTargetELF() \|\| TM.getRelocationModel() == Reloc::Static;
247	}
248
249	void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
250	StringRef FS) {
251	if (CPU.empty())
252	CPU = "generic";
253
254	if (TuneCPU.empty())
255	TuneCPU = "i586"; // FIXME: "generic" is more modern than llc tests expect.
256
257	std::string FullFS = X86_MC::ParseX86Triple(TT: TargetTriple);
258	assert(!FullFS.empty() && "Failed to parse X86 triple");
259
260	if (!FS.empty())
261	FullFS = (Twine (FullFS) + "," + FS).str();
262
263	// Attach EVEX512 feature when we have AVX512 features with a default CPU.
264	// "pentium4" is default CPU for 32-bit targets.
265	// "x86-64" is default CPU for 64-bit targets.
266	if (CPU == "generic" \|\| CPU == "pentium4" \|\| CPU == "x86-64") {
267	size_t posNoEVEX512 = FS.rfind(Str: "-evex512");
268	// Make sure we won't be cheated by "-avx512fp16".
269	size_t posNoAVX512F =
270	FS.ends_with(Suffix: "-avx512f") ? FS.size() - `8` : FS.rfind(Str: "-avx512f,");
271	size_t posEVEX512 = FS.rfind(Str: "+evex512");
272	// Any AVX512XXX will enable AVX512F.
273	size_t posAVX512F = FS.rfind(Str: "+avx512");
274
275	if (posAVX512F != StringRef::npos &&
276	(posNoAVX512F == StringRef::npos \|\| posNoAVX512F < posAVX512F))
277	if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
278	FullFS += ",+evex512";
279	}
280
281	// Parse features string and set the CPU.
282	ParseSubtargetFeatures(CPU, TuneCPU, FS: FullFS);
283
284	// All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
285	// 16-bytes and under that are reasonably fast. These features were
286	// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
287	// micro-architectures respectively.
288	if (hasSSE42() \|\| hasSSE4A())
289	IsUnalignedMem16Slow = false;
290
291	LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
292	<< ", 3DNowLevel " << X863DNowLevel << ", 64bit "
293	<< HasX86_64 << "\n");
294	if (Is64Bit && !HasX86_64)
295	report_fatal_error(reason: "64-bit code requested on a subtarget that doesn't "
296	"support it!");
297
298	// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD, NaCl, and for all
299	// 64-bit targets. On Solaris (32-bit), stack alignment is 4 bytes
300	// following the i386 psABI, while on Illumos it is always 16 bytes.
301	if (StackAlignOverride)
302	stackAlignment = *StackAlignOverride;
303	else if (isTargetDarwin() \|\| isTargetLinux() \|\| isTargetKFreeBSD() \|\|
304	isTargetNaCl() \|\| Is64Bit)
305	stackAlignment = Align (`16`);
306
307	// Consume the vector width attribute or apply any target specific limit.
308	if (PreferVectorWidthOverride)
309	PreferVectorWidth = PreferVectorWidthOverride;
310	else if (Prefer128Bit)
311	PreferVectorWidth = `128`;
312	else if (Prefer256Bit)
313	PreferVectorWidth = `256`;
314	}
315
316	X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
317	StringRef TuneCPU,
318	StringRef FS) {
319	initSubtargetFeatures(CPU, TuneCPU, FS);
320	return *this;
321	}
322
323	X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
324	StringRef FS, const X86TargetMachine &TM,
325	MaybeAlign StackAlignOverride,
326	unsigned PreferVectorWidthOverride,
327	unsigned RequiredVectorWidth)
328	: X86GenSubtargetInfo(TT, CPU, TuneCPU, FS),
329	PICStyle(PICStyles::Style::None), TM(TM), TargetTriple (TT),
330	StackAlignOverride (StackAlignOverride),
331	PreferVectorWidthOverride(PreferVectorWidthOverride),
332	RequiredVectorWidth(RequiredVectorWidth),
333	InstrInfo (initializeSubtargetDependencies(CPU, TuneCPU, FS)),
334	TLInfo (TM, *this), FrameLowering (*this, getStackAlignment()) {
335	// Determine the PICStyle based on the target selected.
336	if (!isPositionIndependent() \|\| TM.getCodeModel() == CodeModel::Large)
337	// With the large code model, None forces all memory accesses to be indirect
338	// rather than RIP-relative.
339	setPICStyle(PICStyles::Style::None);
340	else if (is64Bit())
341	setPICStyle(PICStyles::Style::RIPRel);
342	else if (isTargetCOFF())
343	setPICStyle(PICStyles::Style::None);
344	else if (isTargetDarwin())
345	setPICStyle(PICStyles::Style::StubPIC);
346	else if (isTargetELF())
347	setPICStyle(PICStyles::Style::GOT);
348
349	CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
350	Legalizer.reset(new X86LegalizerInfo(*this, TM));
351
352	auto RBI = new* X86RegisterBankInfo(*getRegisterInfo());
353	RegBankInfo.reset(RBI);
354	InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
355	}
356
357	const CallLowering X86Subtarget::getCallLowering() const* {
358	return CallLoweringInfo.get();
359	}
360
361	InstructionSelector X86Subtarget::getInstructionSelector() const* {
362	return InstSelector.get();
363	}
364
365	const LegalizerInfo X86Subtarget::getLegalizerInfo() const* {
366	return Legalizer.get();
367	}
368
369	const RegisterBankInfo X86Subtarget::getRegBankInfo() const* {
370	return RegBankInfo.get();
371	}
372
373	bool X86Subtarget::enableEarlyIfConversion() const {
374	return canUseCMOV() && X86EarlyIfConv;
375	}
376
377	void X86Subtarget::getPostRAMutations(
378	std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
379	Mutations.push_back(x: createX86MacroFusionDAGMutation());
380	}
381
382	bool X86Subtarget::isPositionIndependent() const {
383	return TM.isPositionIndependent();
384	}
385

source code of llvm/lib/Target/X86/X86Subtarget.cpp