X86CompressEVEX.cpp source code [llvm/lib/Target/X86/X86CompressEVEX.cpp]

1	//===- X86CompressEVEX.cpp ------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10	// when possible in order to reduce code size or facilitate HW decoding.
11	//
12	// Possible compression:
13	// a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14	// b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX)
15	// c. NDD (EVEX) -> non-NDD (legacy)
16	// d. NF_ND (EVEX) -> NF (EVEX)
17	//
18	// Compression a, b and c can always reduce code size, with some exceptions
19	// such as promoted 16-bit CRC32 which is as long as the legacy version.
20	//
21	// legacy:
22	// crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
23	// promoted:
24	// crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
25	//
26	// From performance perspective, these should be same (same uops and same EXE
27	// ports). From a FMV perspective, an older legacy encoding is preferred b/c it
28	// can execute in more places (broader HW install base). So we will still do
29	// the compression.
30	//
31	// Compression d can help hardware decode (HW may skip reading the NDD
32	// register) although the instruction length remains unchanged.
33	//===----------------------------------------------------------------------===//
34
35	#include "MCTargetDesc/X86BaseInfo.h"
36	#include "MCTargetDesc/X86InstComments.h"
37	#include "X86.h"
38	#include "X86InstrInfo.h"
39	#include "X86Subtarget.h"
40	#include "llvm/ADT/StringRef.h"
41	#include "llvm/CodeGen/MachineFunction.h"
42	#include "llvm/CodeGen/MachineFunctionPass.h"
43	#include "llvm/CodeGen/MachineInstr.h"
44	#include "llvm/CodeGen/MachineOperand.h"
45	#include "llvm/MC/MCInstrDesc.h"
46	#include "llvm/Pass.h"
47	#include <atomic>
48	#include <cassert>
49	#include <cstdint>
50
51	using namespace llvm;
52
53	// Including the generated EVEX compression tables.
54	struct X86CompressEVEXTableEntry {
55	uint16_t OldOpc;
56	uint16_t NewOpc;
57
58	bool operator<(const X86CompressEVEXTableEntry &RHS) const {
59	return OldOpc < RHS.OldOpc;
60	}
61
62	friend bool operator<(const X86CompressEVEXTableEntry &TE, unsigned Opc) {
63	return TE.OldOpc < Opc;
64	}
65	};
66	#include "X86GenCompressEVEXTables.inc"
67
68	#define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
69	#define COMP_EVEX_NAME "x86-compress-evex"
70
71	#define DEBUG_TYPE COMP_EVEX_NAME
72
73	namespace {
74
75	class CompressEVEXPass : public MachineFunctionPass {
76	public:
77	static char ID;
78	CompressEVEXPass() : MachineFunctionPass (ID) {}
79	StringRef getPassName() const override { return COMP_EVEX_DESC; }
80
81	bool runOnMachineFunction(MachineFunction &MF) override;
82
83	// This pass runs after regalloc and doesn't support VReg operands.
84	MachineFunctionProperties getRequiredProperties() const override {
85	return MachineFunctionProperties ().set(
86	MachineFunctionProperties::Property::NoVRegs);
87	}
88	};
89
90	} // end anonymous namespace
91
92	char CompressEVEXPass::ID = `0`;
93
94	static bool usesExtendedRegister(const MachineInstr &MI) {
95	auto isHiRegIdx = [](unsigned Reg) {
96	// Check for XMM register with indexes between 16 - 31.
97	if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
98	return true;
99	// Check for YMM register with indexes between 16 - 31.
100	if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
101	return true;
102	// Check for GPR with indexes between 16 - 31.
103	if (X86II::isApxExtendedReg(RegNo: Reg))
104	return true;
105	return false;
106	};
107
108	// Check that operands are not ZMM regs or
109	// XMM/YMM regs with hi indexes between 16 - 31.
110	for (const MachineOperand &MO : MI.explicit_operands()) {
111	if (!MO.isReg())
112	continue;
113
114	Register Reg = MO.getReg();
115	assert(!X86II::isZMMReg(Reg) &&
116	"ZMM instructions should not be in the EVEX->VEX tables");
117	if (isHiRegIdx (Reg))
118	return true;
119	}
120
121	return false;
122	}
123
124	// Do any custom cleanup needed to finalize the conversion.
125	static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
126	(void)NewOpc;
127	unsigned Opc = MI.getOpcode();
128	switch (Opc) {
129	case X86::VALIGNDZ128rri:
130	case X86::VALIGNDZ128rmi:
131	case X86::VALIGNQZ128rri:
132	case X86::VALIGNQZ128rmi: {
133	assert((NewOpc == X86::VPALIGNRrri \|\| NewOpc == X86::VPALIGNRrmi) &&
134	"Unexpected new opcode!");
135	unsigned Scale =
136	(Opc == X86::VALIGNQZ128rri \|\| Opc == X86::VALIGNQZ128rmi) ? `8` : `4`;
137	MachineOperand &Imm = MI.getOperand(i: MI.getNumExplicitOperands() - `1`);
138	Imm.setImm(Imm.getImm() * Scale);
139	break;
140	}
141	case X86::VSHUFF32X4Z256rmi:
142	case X86::VSHUFF32X4Z256rri:
143	case X86::VSHUFF64X2Z256rmi:
144	case X86::VSHUFF64X2Z256rri:
145	case X86::VSHUFI32X4Z256rmi:
146	case X86::VSHUFI32X4Z256rri:
147	case X86::VSHUFI64X2Z256rmi:
148	case X86::VSHUFI64X2Z256rri: {
149	assert((NewOpc == X86::VPERM2F128rr \|\| NewOpc == X86::VPERM2I128rr \|\|
150	NewOpc == X86::VPERM2F128rm \|\| NewOpc == X86::VPERM2I128rm) &&
151	"Unexpected new opcode!");
152	MachineOperand &Imm = MI.getOperand(i: MI.getNumExplicitOperands() - `1`);
153	int64_t ImmVal = Imm.getImm();
154	// Set bit 5, move bit 1 to bit 4, copy bit 0.
155	Imm.setImm(`0x20` \| ((ImmVal & `2`) << `3`) \| (ImmVal & `1`));
156	break;
157	}
158	case X86::VRNDSCALEPDZ128rri:
159	case X86::VRNDSCALEPDZ128rmi:
160	case X86::VRNDSCALEPSZ128rri:
161	case X86::VRNDSCALEPSZ128rmi:
162	case X86::VRNDSCALEPDZ256rri:
163	case X86::VRNDSCALEPDZ256rmi:
164	case X86::VRNDSCALEPSZ256rri:
165	case X86::VRNDSCALEPSZ256rmi:
166	case X86::VRNDSCALESDZr:
167	case X86::VRNDSCALESDZm:
168	case X86::VRNDSCALESSZr:
169	case X86::VRNDSCALESSZm:
170	case X86::VRNDSCALESDZr_Int:
171	case X86::VRNDSCALESDZm_Int:
172	case X86::VRNDSCALESSZr_Int:
173	case X86::VRNDSCALESSZm_Int:
174	const MachineOperand &Imm = MI.getOperand(i: MI.getNumExplicitOperands() - `1`);
175	int64_t ImmVal = Imm.getImm();
176	// Ensure that only bits 3:0 of the immediate are used.
177	if ((ImmVal & `0xf`) != ImmVal)
178	return false;
179	break;
180	}
181
182	return true;
183	}
184
185	static bool isRedundantNewDataDest(MachineInstr &MI, const X86Subtarget &ST) {
186	// $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx
187	// ->
188	// $rbx = ADD64rr $rbx, $rax
189	const MCInstrDesc &Desc = MI.getDesc();
190	Register Reg0 = MI.getOperand(i: `0`).getReg();
191	const MachineOperand &Op1 = MI.getOperand(i: `1`);
192	if (!Op1.isReg() \|\| X86::getFirstAddrOperandIdx(MI) == `1`)
193	return false;
194	Register Reg1 = Op1.getReg();
195	if (Reg1 == Reg0)
196	return true;
197
198	// Op1 and Op2 may be commutable for ND instructions.
199	if (!Desc.isCommutable() \|\| Desc.getNumOperands() < `3` \|\|
200	!MI.getOperand(i: `2`).isReg() \|\| MI.getOperand(i: `2`).getReg() != Reg0)
201	return false;
202	// Opcode may change after commute, e.g. SHRD -> SHLD
203	ST.getInstrInfo()->commuteInstruction(MI, NewMI: false, CommuteOpIdx1: `1`, CommuteOpIdx2: `2`);
204	return true;
205	}
206
207	static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
208	uint64_t TSFlags = MI.getDesc().TSFlags;
209
210	// Check for EVEX instructions only.
211	if ((TSFlags & X86II::EncodingMask) != X86II::EVEX)
212	return false;
213
214	// Instructions with mask or 512-bit vector can't be converted to VEX.
215	if (TSFlags & (X86II::EVEX_K \| X86II::EVEX_L2))
216	return false;
217
218	// EVEX_B has several meanings.
219	// AVX512:
220	// register form: rounding control or SAE
221	// memory form: broadcast
222	//
223	// APX:
224	// MAP4: NDD
225	//
226	// For AVX512 cases, EVEX prefix is needed in order to carry this information
227	// thus preventing the transformation to VEX encoding.
228	unsigned Opc = MI.getOpcode();
229	bool IsND = X86II::hasNewDataDest(TSFlags);
230	if (TSFlags & X86II::EVEX_B && !IsND)
231	return false;
232	// MOVBErr is special because it has semantic of NDD but not set EVEX_B.*
233	bool IsNDLike = IsND \|\| Opc == X86::MOVBE32rr \|\| Opc == X86::MOVBE64rr;
234	if (IsNDLike && !isRedundantNewDataDest(MI, ST))
235	return false;
236
237	ArrayRef<X86CompressEVEXTableEntry> Table = ArrayRef(X86CompressEVEXTable);
238
239	Opc = MI.getOpcode();
240	const auto *I = llvm::lower_bound(Range&: Table, Value&: Opc);
241	if (I == Table.end() \|\| I->OldOpc != Opc) {
242	assert(!IsNDLike && "Missing entry for ND-like instruction");
243	return false;
244	}
245
246	if (!IsNDLike) {
247	if (usesExtendedRegister(MI) \|\| !checkPredicate(I->NewOpc, &ST) \|\|
248	!performCustomAdjustments(MI, I->NewOpc))
249	return false;
250	}
251
252	const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(I->NewOpc);
253	MI.setDesc(NewDesc);
254	unsigned AsmComment;
255	switch (NewDesc.TSFlags & X86II::EncodingMask) {
256	case X86II::LEGACY:
257	AsmComment = X86::AC_EVEX_2_LEGACY;
258	break;
259	case X86II::VEX:
260	AsmComment = X86::AC_EVEX_2_VEX;
261	break;
262	case X86II::EVEX:
263	AsmComment = X86::AC_EVEX_2_EVEX;
264	assert(IsND && (NewDesc.TSFlags & X86II::EVEX_NF) &&
265	"Unknown EVEX2EVEX compression");
266	break;
267	default:
268	llvm_unreachable("Unknown EVEX compression");
269	}
270	MI.setAsmPrinterFlag(AsmComment);
271	if (IsNDLike)
272	MI.tieOperands(DefIdx: `0`, UseIdx: `1`);
273
274	return true;
275	}
276
277	bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
278	#ifndef NDEBUG
279	// Make sure the tables are sorted.
280	static std::atomic<bool> TableChecked(false);
281	if (!TableChecked.load(m: std::memory_order_relaxed)) {
282	assert(llvm::is_sorted(X86CompressEVEXTable) &&
283	"X86CompressEVEXTable is not sorted!");
284	TableChecked.store(i: true, m: std::memory_order_relaxed);
285	}
286	#endif
287	const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
288	if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD())
289	return false;
290
291	bool Changed = false;
292
293	for (MachineBasicBlock &MBB : MF) {
294	// Traverse the basic block.
295	for (MachineInstr &MI : MBB)
296	Changed \|= CompressEVEXImpl(MI, ST);
297	}
298
299	return Changed;
300	}
301
302	INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false)
303
304	FunctionPass *llvm::createX86CompressEVEXPass() {
305	return new CompressEVEXPass ();
306	}
307

source code of llvm/lib/Target/X86/X86CompressEVEX.cpp