1 | //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains code to lower X86 MachineInstrs to their corresponding |
10 | // MCInst records. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "MCTargetDesc/X86ATTInstPrinter.h" |
15 | #include "MCTargetDesc/X86BaseInfo.h" |
16 | #include "MCTargetDesc/X86EncodingOptimization.h" |
17 | #include "MCTargetDesc/X86InstComments.h" |
18 | #include "MCTargetDesc/X86ShuffleDecode.h" |
19 | #include "MCTargetDesc/X86TargetStreamer.h" |
20 | #include "X86AsmPrinter.h" |
21 | #include "X86MachineFunctionInfo.h" |
22 | #include "X86RegisterInfo.h" |
23 | #include "X86ShuffleDecodeConstantPool.h" |
24 | #include "X86Subtarget.h" |
25 | #include "llvm/ADT/SmallString.h" |
26 | #include "llvm/ADT/StringExtras.h" |
27 | #include "llvm/CodeGen/MachineConstantPool.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineModuleInfoImpls.h" |
30 | #include "llvm/CodeGen/MachineOperand.h" |
31 | #include "llvm/CodeGen/StackMaps.h" |
32 | #include "llvm/IR/DataLayout.h" |
33 | #include "llvm/IR/GlobalValue.h" |
34 | #include "llvm/IR/Mangler.h" |
35 | #include "llvm/MC/MCAsmInfo.h" |
36 | #include "llvm/MC/MCCodeEmitter.h" |
37 | #include "llvm/MC/MCContext.h" |
38 | #include "llvm/MC/MCExpr.h" |
39 | #include "llvm/MC/MCFixup.h" |
40 | #include "llvm/MC/MCInst.h" |
41 | #include "llvm/MC/MCInstBuilder.h" |
42 | #include "llvm/MC/MCSection.h" |
43 | #include "llvm/MC/MCSectionELF.h" |
44 | #include "llvm/MC/MCStreamer.h" |
45 | #include "llvm/MC/MCSymbol.h" |
46 | #include "llvm/MC/MCSymbolELF.h" |
47 | #include "llvm/MC/TargetRegistry.h" |
48 | #include "llvm/Target/TargetLoweringObjectFile.h" |
49 | #include "llvm/Target/TargetMachine.h" |
50 | #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" |
51 | #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" |
52 | #include <string> |
53 | |
54 | using namespace llvm; |
55 | |
56 | namespace { |
57 | |
58 | /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. |
59 | class X86MCInstLower { |
60 | MCContext &Ctx; |
61 | const MachineFunction &MF; |
62 | const TargetMachine &TM; |
63 | const MCAsmInfo &MAI; |
64 | X86AsmPrinter &AsmPrinter; |
65 | |
66 | public: |
67 | X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); |
68 | |
69 | std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, |
70 | const MachineOperand &MO) const; |
71 | void Lower(const MachineInstr *MI, MCInst &OutMI) const; |
72 | |
73 | MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; |
74 | MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; |
75 | |
76 | private: |
77 | MachineModuleInfoMachO &getMachOMMI() const; |
78 | }; |
79 | |
80 | } // end anonymous namespace |
81 | |
82 | /// A RAII helper which defines a region of instructions which can't have |
83 | /// padding added between them for correctness. |
84 | struct NoAutoPaddingScope { |
85 | MCStreamer &OS; |
86 | const bool OldAllowAutoPadding; |
87 | NoAutoPaddingScope(MCStreamer &OS) |
88 | : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { |
89 | changeAndComment(b: false); |
90 | } |
91 | ~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); } |
92 | void changeAndComment(bool b) { |
93 | if (b == OS.getAllowAutoPadding()) |
94 | return; |
95 | OS.setAllowAutoPadding(b); |
96 | if (b) |
97 | OS.emitRawComment(T: "autopadding" ); |
98 | else |
99 | OS.emitRawComment(T: "noautopadding" ); |
100 | } |
101 | }; |
102 | |
103 | // Emit a minimal sequence of nops spanning NumBytes bytes. |
104 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
105 | const X86Subtarget *Subtarget); |
106 | |
107 | void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, |
108 | const MCSubtargetInfo &STI, |
109 | MCCodeEmitter *CodeEmitter) { |
110 | if (InShadow) { |
111 | SmallString<256> Code; |
112 | SmallVector<MCFixup, 4> Fixups; |
113 | CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI); |
114 | CurrentShadowSize += Code.size(); |
115 | if (CurrentShadowSize >= RequiredShadowSize) |
116 | InShadow = false; // The shadow is big enough. Stop counting. |
117 | } |
118 | } |
119 | |
120 | void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( |
121 | MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { |
122 | if (InShadow && CurrentShadowSize < RequiredShadowSize) { |
123 | InShadow = false; |
124 | emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize, |
125 | Subtarget: &MF->getSubtarget<X86Subtarget>()); |
126 | } |
127 | } |
128 | |
129 | void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { |
130 | OutStreamer->emitInstruction(Inst, STI: getSubtargetInfo()); |
131 | SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
132 | } |
133 | |
134 | X86MCInstLower::X86MCInstLower(const MachineFunction &mf, |
135 | X86AsmPrinter &asmprinter) |
136 | : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), |
137 | AsmPrinter(asmprinter) {} |
138 | |
139 | MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { |
140 | return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); |
141 | } |
142 | |
143 | /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol |
144 | /// operand to an MCSymbol. |
145 | MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { |
146 | const Triple &TT = TM.getTargetTriple(); |
147 | if (MO.isGlobal() && TT.isOSBinFormatELF()) |
148 | return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal()); |
149 | |
150 | const DataLayout &DL = MF.getDataLayout(); |
151 | assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && |
152 | "Isn't a symbol reference" ); |
153 | |
154 | MCSymbol *Sym = nullptr; |
155 | SmallString<128> Name; |
156 | StringRef Suffix; |
157 | |
158 | switch (MO.getTargetFlags()) { |
159 | case X86II::MO_DLLIMPORT: |
160 | // Handle dllimport linkage. |
161 | Name += "__imp_" ; |
162 | break; |
163 | case X86II::MO_COFFSTUB: |
164 | Name += ".refptr." ; |
165 | break; |
166 | case X86II::MO_DARWIN_NONLAZY: |
167 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
168 | Suffix = "$non_lazy_ptr" ; |
169 | break; |
170 | } |
171 | |
172 | if (!Suffix.empty()) |
173 | Name += DL.getPrivateGlobalPrefix(); |
174 | |
175 | if (MO.isGlobal()) { |
176 | const GlobalValue *GV = MO.getGlobal(); |
177 | AsmPrinter.getNameWithPrefix(Name, GV); |
178 | } else if (MO.isSymbol()) { |
179 | Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL); |
180 | } else if (MO.isMBB()) { |
181 | assert(Suffix.empty()); |
182 | Sym = MO.getMBB()->getSymbol(); |
183 | } |
184 | |
185 | Name += Suffix; |
186 | if (!Sym) |
187 | Sym = Ctx.getOrCreateSymbol(Name); |
188 | |
189 | // If the target flags on the operand changes the name of the symbol, do that |
190 | // before we return the symbol. |
191 | switch (MO.getTargetFlags()) { |
192 | default: |
193 | break; |
194 | case X86II::MO_COFFSTUB: { |
195 | MachineModuleInfoCOFF &MMICOFF = |
196 | MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>(); |
197 | MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); |
198 | if (!StubSym.getPointer()) { |
199 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
200 | StubSym = MachineModuleInfoImpl::StubValueTy( |
201 | AsmPrinter.getSymbol(GV: MO.getGlobal()), true); |
202 | } |
203 | break; |
204 | } |
205 | case X86II::MO_DARWIN_NONLAZY: |
206 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { |
207 | MachineModuleInfoImpl::StubValueTy &StubSym = |
208 | getMachOMMI().getGVStubEntry(Sym); |
209 | if (!StubSym.getPointer()) { |
210 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
211 | StubSym = MachineModuleInfoImpl::StubValueTy( |
212 | AsmPrinter.getSymbol(GV: MO.getGlobal()), |
213 | !MO.getGlobal()->hasInternalLinkage()); |
214 | } |
215 | break; |
216 | } |
217 | } |
218 | |
219 | return Sym; |
220 | } |
221 | |
222 | MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, |
223 | MCSymbol *Sym) const { |
224 | // FIXME: We would like an efficient form for this, so we don't have to do a |
225 | // lot of extra uniquing. |
226 | const MCExpr *Expr = nullptr; |
227 | MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; |
228 | |
229 | switch (MO.getTargetFlags()) { |
230 | default: |
231 | llvm_unreachable("Unknown target flag on GV operand" ); |
232 | case X86II::MO_NO_FLAG: // No flag. |
233 | // These affect the name of the symbol, not any suffix. |
234 | case X86II::MO_DARWIN_NONLAZY: |
235 | case X86II::MO_DLLIMPORT: |
236 | case X86II::MO_COFFSTUB: |
237 | break; |
238 | |
239 | case X86II::MO_TLVP: |
240 | RefKind = MCSymbolRefExpr::VK_TLVP; |
241 | break; |
242 | case X86II::MO_TLVP_PIC_BASE: |
243 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: MCSymbolRefExpr::VK_TLVP, Ctx); |
244 | // Subtract the pic base. |
245 | Expr = MCBinaryExpr::createSub( |
246 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
247 | break; |
248 | case X86II::MO_SECREL: |
249 | RefKind = MCSymbolRefExpr::VK_SECREL; |
250 | break; |
251 | case X86II::MO_TLSGD: |
252 | RefKind = MCSymbolRefExpr::VK_TLSGD; |
253 | break; |
254 | case X86II::MO_TLSLD: |
255 | RefKind = MCSymbolRefExpr::VK_TLSLD; |
256 | break; |
257 | case X86II::MO_TLSLDM: |
258 | RefKind = MCSymbolRefExpr::VK_TLSLDM; |
259 | break; |
260 | case X86II::MO_GOTTPOFF: |
261 | RefKind = MCSymbolRefExpr::VK_GOTTPOFF; |
262 | break; |
263 | case X86II::MO_INDNTPOFF: |
264 | RefKind = MCSymbolRefExpr::VK_INDNTPOFF; |
265 | break; |
266 | case X86II::MO_TPOFF: |
267 | RefKind = MCSymbolRefExpr::VK_TPOFF; |
268 | break; |
269 | case X86II::MO_DTPOFF: |
270 | RefKind = MCSymbolRefExpr::VK_DTPOFF; |
271 | break; |
272 | case X86II::MO_NTPOFF: |
273 | RefKind = MCSymbolRefExpr::VK_NTPOFF; |
274 | break; |
275 | case X86II::MO_GOTNTPOFF: |
276 | RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; |
277 | break; |
278 | case X86II::MO_GOTPCREL: |
279 | RefKind = MCSymbolRefExpr::VK_GOTPCREL; |
280 | break; |
281 | case X86II::MO_GOTPCREL_NORELAX: |
282 | RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; |
283 | break; |
284 | case X86II::MO_GOT: |
285 | RefKind = MCSymbolRefExpr::VK_GOT; |
286 | break; |
287 | case X86II::MO_GOTOFF: |
288 | RefKind = MCSymbolRefExpr::VK_GOTOFF; |
289 | break; |
290 | case X86II::MO_PLT: |
291 | RefKind = MCSymbolRefExpr::VK_PLT; |
292 | break; |
293 | case X86II::MO_ABS8: |
294 | RefKind = MCSymbolRefExpr::VK_X86_ABS8; |
295 | break; |
296 | case X86II::MO_PIC_BASE_OFFSET: |
297 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
298 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
299 | // Subtract the pic base. |
300 | Expr = MCBinaryExpr::createSub( |
301 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
302 | if (MO.isJTI()) { |
303 | assert(MAI.doesSetDirectiveSuppressReloc()); |
304 | // If .set directive is supported, use it to reduce the number of |
305 | // relocations the assembler will generate for differences between |
306 | // local labels. This is only safe when the symbols are in the same |
307 | // section so we are restricting it to jumptable references. |
308 | MCSymbol *Label = Ctx.createTempSymbol(); |
309 | AsmPrinter.OutStreamer->emitAssignment(Symbol: Label, Value: Expr); |
310 | Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx); |
311 | } |
312 | break; |
313 | } |
314 | |
315 | if (!Expr) |
316 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: RefKind, Ctx); |
317 | |
318 | if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) |
319 | Expr = MCBinaryExpr::createAdd( |
320 | LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx); |
321 | return MCOperand::createExpr(Val: Expr); |
322 | } |
323 | |
324 | static unsigned getRetOpcode(const X86Subtarget &Subtarget) { |
325 | return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; |
326 | } |
327 | |
328 | std::optional<MCOperand> |
329 | X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, |
330 | const MachineOperand &MO) const { |
331 | switch (MO.getType()) { |
332 | default: |
333 | MI->print(OS&: errs()); |
334 | llvm_unreachable("unknown operand type" ); |
335 | case MachineOperand::MO_Register: |
336 | // Ignore all implicit register operands. |
337 | if (MO.isImplicit()) |
338 | return std::nullopt; |
339 | return MCOperand::createReg(Reg: MO.getReg()); |
340 | case MachineOperand::MO_Immediate: |
341 | return MCOperand::createImm(Val: MO.getImm()); |
342 | case MachineOperand::MO_MachineBasicBlock: |
343 | case MachineOperand::MO_GlobalAddress: |
344 | case MachineOperand::MO_ExternalSymbol: |
345 | return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO)); |
346 | case MachineOperand::MO_MCSymbol: |
347 | return LowerSymbolOperand(MO, Sym: MO.getMCSymbol()); |
348 | case MachineOperand::MO_JumpTableIndex: |
349 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex())); |
350 | case MachineOperand::MO_ConstantPoolIndex: |
351 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex())); |
352 | case MachineOperand::MO_BlockAddress: |
353 | return LowerSymbolOperand( |
354 | MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress())); |
355 | case MachineOperand::MO_RegisterMask: |
356 | // Ignore call clobbers. |
357 | return std::nullopt; |
358 | } |
359 | } |
360 | |
361 | // Replace TAILJMP opcodes with their equivalent opcodes that have encoding |
362 | // information. |
363 | static unsigned convertTailJumpOpcode(unsigned Opcode) { |
364 | switch (Opcode) { |
365 | case X86::TAILJMPr: |
366 | Opcode = X86::JMP32r; |
367 | break; |
368 | case X86::TAILJMPm: |
369 | Opcode = X86::JMP32m; |
370 | break; |
371 | case X86::TAILJMPr64: |
372 | Opcode = X86::JMP64r; |
373 | break; |
374 | case X86::TAILJMPm64: |
375 | Opcode = X86::JMP64m; |
376 | break; |
377 | case X86::TAILJMPr64_REX: |
378 | Opcode = X86::JMP64r_REX; |
379 | break; |
380 | case X86::TAILJMPm64_REX: |
381 | Opcode = X86::JMP64m_REX; |
382 | break; |
383 | case X86::TAILJMPd: |
384 | case X86::TAILJMPd64: |
385 | Opcode = X86::JMP_1; |
386 | break; |
387 | case X86::TAILJMPd_CC: |
388 | case X86::TAILJMPd64_CC: |
389 | Opcode = X86::JCC_1; |
390 | break; |
391 | } |
392 | |
393 | return Opcode; |
394 | } |
395 | |
396 | void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { |
397 | OutMI.setOpcode(MI->getOpcode()); |
398 | |
399 | for (const MachineOperand &MO : MI->operands()) |
400 | if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) |
401 | OutMI.addOperand(Op: *MaybeMCOp); |
402 | |
403 | bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); |
404 | if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) || |
405 | X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) || |
406 | X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) || |
407 | X86::optimizeMOVSX(MI&: OutMI) || X86::optimizeINCDEC(MI&: OutMI, In64BitMode) || |
408 | X86::optimizeMOV(MI&: OutMI, In64BitMode) || |
409 | X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI)) |
410 | return; |
411 | |
412 | // Handle a few special cases to eliminate operand modifiers. |
413 | switch (OutMI.getOpcode()) { |
414 | case X86::LEA64_32r: |
415 | case X86::LEA64r: |
416 | case X86::LEA16r: |
417 | case X86::LEA32r: |
418 | // LEA should have a segment register, but it must be empty. |
419 | assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && |
420 | "Unexpected # of LEA operands" ); |
421 | assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && |
422 | "LEA has segment specified!" ); |
423 | break; |
424 | case X86::MULX32Hrr: |
425 | case X86::MULX32Hrm: |
426 | case X86::MULX64Hrr: |
427 | case X86::MULX64Hrm: { |
428 | // Turn into regular MULX by duplicating the destination. |
429 | unsigned NewOpc; |
430 | switch (OutMI.getOpcode()) { |
431 | default: llvm_unreachable("Invalid opcode" ); |
432 | case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; |
433 | case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; |
434 | case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; |
435 | case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; |
436 | } |
437 | OutMI.setOpcode(NewOpc); |
438 | // Duplicate the destination. |
439 | unsigned DestReg = OutMI.getOperand(i: 0).getReg(); |
440 | OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg)); |
441 | break; |
442 | } |
443 | // CALL64r, CALL64pcrel32 - These instructions used to have |
444 | // register inputs modeled as normal uses instead of implicit uses. As such, |
445 | // they we used to truncate off all but the first operand (the callee). This |
446 | // issue seems to have been fixed at some point. This assert verifies that. |
447 | case X86::CALL64r: |
448 | case X86::CALL64pcrel32: |
449 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
450 | break; |
451 | case X86::EH_RETURN: |
452 | case X86::EH_RETURN64: { |
453 | OutMI = MCInst(); |
454 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
455 | break; |
456 | } |
457 | case X86::CLEANUPRET: { |
458 | // Replace CLEANUPRET with the appropriate RET. |
459 | OutMI = MCInst(); |
460 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
461 | break; |
462 | } |
463 | case X86::CATCHRET: { |
464 | // Replace CATCHRET with the appropriate RET. |
465 | const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); |
466 | unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; |
467 | OutMI = MCInst(); |
468 | OutMI.setOpcode(getRetOpcode(Subtarget)); |
469 | OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg)); |
470 | break; |
471 | } |
472 | // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump |
473 | // instruction. |
474 | case X86::TAILJMPr: |
475 | case X86::TAILJMPr64: |
476 | case X86::TAILJMPr64_REX: |
477 | case X86::TAILJMPd: |
478 | case X86::TAILJMPd64: |
479 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
480 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
481 | break; |
482 | case X86::TAILJMPd_CC: |
483 | case X86::TAILJMPd64_CC: |
484 | assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!" ); |
485 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
486 | break; |
487 | case X86::TAILJMPm: |
488 | case X86::TAILJMPm64: |
489 | case X86::TAILJMPm64_REX: |
490 | assert(OutMI.getNumOperands() == X86::AddrNumOperands && |
491 | "Unexpected number of operands!" ); |
492 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
493 | break; |
494 | case X86::MASKMOVDQU: |
495 | case X86::VMASKMOVDQU: |
496 | if (In64BitMode) |
497 | OutMI.setFlags(X86::IP_HAS_AD_SIZE); |
498 | break; |
499 | case X86::BSF16rm: |
500 | case X86::BSF16rr: |
501 | case X86::BSF32rm: |
502 | case X86::BSF32rr: |
503 | case X86::BSF64rm: |
504 | case X86::BSF64rr: { |
505 | // Add an REP prefix to BSF instructions so that new processors can |
506 | // recognize as TZCNT, which has better performance than BSF. |
507 | // BSF and TZCNT have different interpretations on ZF bit. So make sure |
508 | // it won't be used later. |
509 | const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS); |
510 | if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) |
511 | OutMI.setFlags(X86::IP_HAS_REPEAT); |
512 | break; |
513 | } |
514 | default: |
515 | break; |
516 | } |
517 | } |
518 | |
519 | void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, |
520 | const MachineInstr &MI) { |
521 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
522 | bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 && |
523 | MI.getOpcode() != X86::TLS_base_addr32; |
524 | bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || |
525 | MI.getOpcode() == X86::TLS_base_addr64; |
526 | MCContext &Ctx = OutStreamer->getContext(); |
527 | |
528 | MCSymbolRefExpr::VariantKind SRVK; |
529 | switch (MI.getOpcode()) { |
530 | case X86::TLS_addr32: |
531 | case X86::TLS_addr64: |
532 | case X86::TLS_addrX32: |
533 | SRVK = MCSymbolRefExpr::VK_TLSGD; |
534 | break; |
535 | case X86::TLS_base_addr32: |
536 | SRVK = MCSymbolRefExpr::VK_TLSLDM; |
537 | break; |
538 | case X86::TLS_base_addr64: |
539 | case X86::TLS_base_addrX32: |
540 | SRVK = MCSymbolRefExpr::VK_TLSLD; |
541 | break; |
542 | default: |
543 | llvm_unreachable("unexpected opcode" ); |
544 | } |
545 | |
546 | const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( |
547 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), Kind: SRVK, Ctx); |
548 | |
549 | // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD |
550 | // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is |
551 | // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by |
552 | // only using GOT when GOTPCRELX is enabled. |
553 | // TODO Delete the workaround when GOTPCRELX becomes commonplace. |
554 | bool UseGot = MMI->getModule()->getRtLibUseGOT() && |
555 | Ctx.getAsmInfo()->canRelaxRelocations(); |
556 | |
557 | if (Is64Bits) { |
558 | bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; |
559 | if (NeedsPadding && Is64BitsLP64) |
560 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
561 | EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) |
562 | .addReg(X86::RDI) |
563 | .addReg(X86::RIP) |
564 | .addImm(1) |
565 | .addReg(0) |
566 | .addExpr(Sym) |
567 | .addReg(0)); |
568 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr" ); |
569 | if (NeedsPadding) { |
570 | if (!UseGot) |
571 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
572 | EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); |
573 | EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); |
574 | } |
575 | if (UseGot) { |
576 | const MCExpr *Expr = MCSymbolRefExpr::create( |
577 | Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOTPCREL, Ctx); |
578 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) |
579 | .addReg(X86::RIP) |
580 | .addImm(1) |
581 | .addReg(0) |
582 | .addExpr(Expr) |
583 | .addReg(0)); |
584 | } else { |
585 | EmitAndCountInstruction( |
586 | MCInstBuilder(X86::CALL64pcrel32) |
587 | .addExpr(MCSymbolRefExpr::create(TlsGetAddr, |
588 | MCSymbolRefExpr::VK_PLT, Ctx))); |
589 | } |
590 | } else { |
591 | if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { |
592 | EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) |
593 | .addReg(X86::EAX) |
594 | .addReg(0) |
595 | .addImm(1) |
596 | .addReg(X86::EBX) |
597 | .addExpr(Sym) |
598 | .addReg(0)); |
599 | } else { |
600 | EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) |
601 | .addReg(X86::EAX) |
602 | .addReg(X86::EBX) |
603 | .addImm(1) |
604 | .addReg(0) |
605 | .addExpr(Sym) |
606 | .addReg(0)); |
607 | } |
608 | |
609 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr" ); |
610 | if (UseGot) { |
611 | const MCExpr *Expr = |
612 | MCSymbolRefExpr::create(Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOT, Ctx); |
613 | EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) |
614 | .addReg(X86::EBX) |
615 | .addImm(1) |
616 | .addReg(0) |
617 | .addExpr(Expr) |
618 | .addReg(0)); |
619 | } else { |
620 | EmitAndCountInstruction( |
621 | MCInstBuilder(X86::CALLpcrel32) |
622 | .addExpr(MCSymbolRefExpr::create(TlsGetAddr, |
623 | MCSymbolRefExpr::VK_PLT, Ctx))); |
624 | } |
625 | } |
626 | } |
627 | |
628 | /// Emit the largest nop instruction smaller than or equal to \p NumBytes |
629 | /// bytes. Return the size of nop emitted. |
630 | static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, |
631 | const X86Subtarget *Subtarget) { |
632 | // Determine the longest nop which can be efficiently decoded for the given |
633 | // target cpu. 15-bytes is the longest single NOP instruction, but some |
634 | // platforms can't decode the longest forms efficiently. |
635 | unsigned MaxNopLength = 1; |
636 | if (Subtarget->is64Bit()) { |
637 | // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the |
638 | // IndexReg/BaseReg below need to be updated. |
639 | if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) |
640 | MaxNopLength = 7; |
641 | else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) |
642 | MaxNopLength = 15; |
643 | else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) |
644 | MaxNopLength = 11; |
645 | else |
646 | MaxNopLength = 10; |
647 | } if (Subtarget->is32Bit()) |
648 | MaxNopLength = 2; |
649 | |
650 | // Cap a single nop emission at the profitable value for the target |
651 | NumBytes = std::min(a: NumBytes, b: MaxNopLength); |
652 | |
653 | unsigned NopSize; |
654 | unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; |
655 | IndexReg = Displacement = SegmentReg = 0; |
656 | BaseReg = X86::RAX; |
657 | ScaleVal = 1; |
658 | switch (NumBytes) { |
659 | case 0: |
660 | llvm_unreachable("Zero nops?" ); |
661 | break; |
662 | case 1: |
663 | NopSize = 1; |
664 | Opc = X86::NOOP; |
665 | break; |
666 | case 2: |
667 | NopSize = 2; |
668 | Opc = X86::XCHG16ar; |
669 | break; |
670 | case 3: |
671 | NopSize = 3; |
672 | Opc = X86::NOOPL; |
673 | break; |
674 | case 4: |
675 | NopSize = 4; |
676 | Opc = X86::NOOPL; |
677 | Displacement = 8; |
678 | break; |
679 | case 5: |
680 | NopSize = 5; |
681 | Opc = X86::NOOPL; |
682 | Displacement = 8; |
683 | IndexReg = X86::RAX; |
684 | break; |
685 | case 6: |
686 | NopSize = 6; |
687 | Opc = X86::NOOPW; |
688 | Displacement = 8; |
689 | IndexReg = X86::RAX; |
690 | break; |
691 | case 7: |
692 | NopSize = 7; |
693 | Opc = X86::NOOPL; |
694 | Displacement = 512; |
695 | break; |
696 | case 8: |
697 | NopSize = 8; |
698 | Opc = X86::NOOPL; |
699 | Displacement = 512; |
700 | IndexReg = X86::RAX; |
701 | break; |
702 | case 9: |
703 | NopSize = 9; |
704 | Opc = X86::NOOPW; |
705 | Displacement = 512; |
706 | IndexReg = X86::RAX; |
707 | break; |
708 | default: |
709 | NopSize = 10; |
710 | Opc = X86::NOOPW; |
711 | Displacement = 512; |
712 | IndexReg = X86::RAX; |
713 | SegmentReg = X86::CS; |
714 | break; |
715 | } |
716 | |
717 | unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: 5U); |
718 | NopSize += NumPrefixes; |
719 | for (unsigned i = 0; i != NumPrefixes; ++i) |
720 | OS.emitBytes(Data: "\x66" ); |
721 | |
722 | switch (Opc) { |
723 | default: llvm_unreachable("Unexpected opcode" ); |
724 | case X86::NOOP: |
725 | OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); |
726 | break; |
727 | case X86::XCHG16ar: |
728 | OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), |
729 | *Subtarget); |
730 | break; |
731 | case X86::NOOPL: |
732 | case X86::NOOPW: |
733 | OS.emitInstruction(MCInstBuilder(Opc) |
734 | .addReg(Reg: BaseReg) |
735 | .addImm(Val: ScaleVal) |
736 | .addReg(Reg: IndexReg) |
737 | .addImm(Val: Displacement) |
738 | .addReg(Reg: SegmentReg), |
739 | *Subtarget); |
740 | break; |
741 | } |
742 | assert(NopSize <= NumBytes && "We overemitted?" ); |
743 | return NopSize; |
744 | } |
745 | |
746 | /// Emit the optimal amount of multi-byte nops on X86. |
747 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
748 | const X86Subtarget *Subtarget) { |
749 | unsigned NopsToEmit = NumBytes; |
750 | (void)NopsToEmit; |
751 | while (NumBytes) { |
752 | NumBytes -= emitNop(OS, NumBytes, Subtarget); |
753 | assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!" ); |
754 | } |
755 | } |
756 | |
757 | void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, |
758 | X86MCInstLower &MCIL) { |
759 | assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64" ); |
760 | |
761 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
762 | |
763 | StatepointOpers SOpers(&MI); |
764 | if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { |
765 | emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget); |
766 | } else { |
767 | // Lower call target and choose correct opcode |
768 | const MachineOperand &CallTarget = SOpers.getCallTarget(); |
769 | MCOperand CallTargetMCOp; |
770 | unsigned CallOpcode; |
771 | switch (CallTarget.getType()) { |
772 | case MachineOperand::MO_GlobalAddress: |
773 | case MachineOperand::MO_ExternalSymbol: |
774 | CallTargetMCOp = MCIL.LowerSymbolOperand( |
775 | MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget)); |
776 | CallOpcode = X86::CALL64pcrel32; |
777 | // Currently, we only support relative addressing with statepoints. |
778 | // Otherwise, we'll need a scratch register to hold the target |
779 | // address. You'll fail asserts during load & relocation if this |
780 | // symbol is to far away. (TODO: support non-relative addressing) |
781 | break; |
782 | case MachineOperand::MO_Immediate: |
783 | CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm()); |
784 | CallOpcode = X86::CALL64pcrel32; |
785 | // Currently, we only support relative addressing with statepoints. |
786 | // Otherwise, we'll need a scratch register to hold the target |
787 | // immediate. You'll fail asserts during load & relocation if this |
788 | // address is to far away. (TODO: support non-relative addressing) |
789 | break; |
790 | case MachineOperand::MO_Register: |
791 | // FIXME: Add retpoline support and remove this. |
792 | if (Subtarget->useIndirectThunkCalls()) |
793 | report_fatal_error(reason: "Lowering register statepoints with thunks not " |
794 | "yet implemented." ); |
795 | CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg()); |
796 | CallOpcode = X86::CALL64r; |
797 | break; |
798 | default: |
799 | llvm_unreachable("Unsupported operand type in statepoint call target" ); |
800 | break; |
801 | } |
802 | |
803 | // Emit call |
804 | MCInst CallInst; |
805 | CallInst.setOpcode(CallOpcode); |
806 | CallInst.addOperand(Op: CallTargetMCOp); |
807 | OutStreamer->emitInstruction(Inst: CallInst, STI: getSubtargetInfo()); |
808 | } |
809 | |
810 | // Record our statepoint node in the same section used by STACKMAP |
811 | // and PATCHPOINT |
812 | auto &Ctx = OutStreamer->getContext(); |
813 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
814 | OutStreamer->emitLabel(Symbol: MILabel); |
815 | SM.recordStatepoint(L: *MILabel, MI); |
816 | } |
817 | |
818 | void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, |
819 | X86MCInstLower &MCIL) { |
820 | // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, |
821 | // <opcode>, <operands> |
822 | |
823 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
824 | |
825 | Register DefRegister = FaultingMI.getOperand(i: 0).getReg(); |
826 | FaultMaps::FaultKind FK = |
827 | static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: 1).getImm()); |
828 | MCSymbol *HandlerLabel = FaultingMI.getOperand(i: 2).getMBB()->getSymbol(); |
829 | unsigned Opcode = FaultingMI.getOperand(i: 3).getImm(); |
830 | unsigned OperandsBeginIdx = 4; |
831 | |
832 | auto &Ctx = OutStreamer->getContext(); |
833 | MCSymbol *FaultingLabel = Ctx.createTempSymbol(); |
834 | OutStreamer->emitLabel(Symbol: FaultingLabel); |
835 | |
836 | assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!" ); |
837 | FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel); |
838 | |
839 | MCInst MI; |
840 | MI.setOpcode(Opcode); |
841 | |
842 | if (DefRegister != X86::NoRegister) |
843 | MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister)); |
844 | |
845 | for (const MachineOperand &MO : |
846 | llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx)) |
847 | if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &FaultingMI, MO)) |
848 | MI.addOperand(Op: *MaybeOperand); |
849 | |
850 | OutStreamer->AddComment(T: "on-fault: " + HandlerLabel->getName()); |
851 | OutStreamer->emitInstruction(Inst: MI, STI: getSubtargetInfo()); |
852 | } |
853 | |
854 | void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, |
855 | X86MCInstLower &MCIL) { |
856 | bool Is64Bits = Subtarget->is64Bit(); |
857 | MCContext &Ctx = OutStreamer->getContext(); |
858 | MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__" ); |
859 | const MCSymbolRefExpr *Op = |
860 | MCSymbolRefExpr::create(Symbol: fentry, Kind: MCSymbolRefExpr::VK_None, Ctx); |
861 | |
862 | EmitAndCountInstruction( |
863 | MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) |
864 | .addExpr(Op)); |
865 | } |
866 | |
867 | void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { |
868 | assert(std::next(MI.getIterator())->isCall() && |
869 | "KCFI_CHECK not followed by a call instruction" ); |
870 | |
871 | // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() |
872 | // returns a 1-byte X86::NOOP, which means the offset is the same in |
873 | // bytes. This assumes that patchable-function-prefix is the same for all |
874 | // functions. |
875 | const MachineFunction &MF = *MI.getMF(); |
876 | int64_t PrefixNops = 0; |
877 | (void)MF.getFunction() |
878 | .getFnAttribute(Kind: "patchable-function-prefix" ) |
879 | .getValueAsString() |
880 | .getAsInteger(Radix: 10, Result&: PrefixNops); |
881 | |
882 | // KCFI allows indirect calls to any location that's preceded by a valid |
883 | // type identifier. To avoid encoding the full constant into an instruction, |
884 | // and thus emitting potential call target gadgets at each indirect call |
885 | // site, load a negated constant to a register and compare that to the |
886 | // expected value at the call target. |
887 | const Register AddrReg = MI.getOperand(i: 0).getReg(); |
888 | const uint32_t Type = MI.getOperand(i: 1).getImm(); |
889 | // The check is immediately before the call. If the call target is in R10, |
890 | // we can clobber R11 for the check instead. |
891 | unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; |
892 | EmitAndCountInstruction( |
893 | MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); |
894 | EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) |
895 | .addReg(X86::NoRegister) |
896 | .addReg(TempReg) |
897 | .addReg(AddrReg) |
898 | .addImm(1) |
899 | .addReg(X86::NoRegister) |
900 | .addImm(-(PrefixNops + 4)) |
901 | .addReg(X86::NoRegister)); |
902 | |
903 | MCSymbol *Pass = OutContext.createTempSymbol(); |
904 | EmitAndCountInstruction( |
905 | MCInstBuilder(X86::JCC_1) |
906 | .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) |
907 | .addImm(X86::COND_E)); |
908 | |
909 | MCSymbol *Trap = OutContext.createTempSymbol(); |
910 | OutStreamer->emitLabel(Symbol: Trap); |
911 | EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); |
912 | emitKCFITrapEntry(MF, Symbol: Trap); |
913 | OutStreamer->emitLabel(Symbol: Pass); |
914 | } |
915 | |
916 | void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { |
917 | // FIXME: Make this work on non-ELF. |
918 | if (!TM.getTargetTriple().isOSBinFormatELF()) { |
919 | report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF" ); |
920 | return; |
921 | } |
922 | |
923 | const auto &Reg = MI.getOperand(i: 0).getReg(); |
924 | ASanAccessInfo AccessInfo(MI.getOperand(i: 1).getImm()); |
925 | |
926 | uint64_t ShadowBase; |
927 | int MappingScale; |
928 | bool OrShadowOffset; |
929 | getAddressSanitizerParams(TargetTriple: Triple(TM.getTargetTriple()), LongSize: 64, |
930 | IsKasan: AccessInfo.CompileKernel, ShadowBase: &ShadowBase, |
931 | MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset); |
932 | |
933 | StringRef Name = AccessInfo.IsWrite ? "store" : "load" ; |
934 | StringRef Op = OrShadowOffset ? "or" : "add" ; |
935 | std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + |
936 | Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + |
937 | TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg())) |
938 | .str(); |
939 | if (OrShadowOffset) |
940 | report_fatal_error( |
941 | reason: "OrShadowOffset is not supported with optimized callbacks" ); |
942 | |
943 | EmitAndCountInstruction( |
944 | MCInstBuilder(X86::CALL64pcrel32) |
945 | .addExpr(MCSymbolRefExpr::create( |
946 | OutContext.getOrCreateSymbol(SymName), OutContext))); |
947 | } |
948 | |
949 | void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, |
950 | X86MCInstLower &MCIL) { |
951 | // PATCHABLE_OP minsize |
952 | |
953 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
954 | |
955 | auto NextMI = std::find_if(first: std::next(x: MI.getIterator()), |
956 | last: MI.getParent()->end().getInstrIterator(), |
957 | pred: [](auto &II) { return !II.isMetaInstruction(); }); |
958 | |
959 | SmallString<256> Code; |
960 | unsigned MinSize = MI.getOperand(i: 0).getImm(); |
961 | |
962 | if (NextMI != MI.getParent()->end()) { |
963 | // Lower the next MachineInstr to find its byte size. |
964 | MCInst MCI; |
965 | MCIL.Lower(MI: &*NextMI, OutMI&: MCI); |
966 | |
967 | SmallVector<MCFixup, 4> Fixups; |
968 | CodeEmitter->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo()); |
969 | } |
970 | |
971 | if (Code.size() < MinSize) { |
972 | if (MinSize == 2 && Subtarget->is32Bit() && |
973 | Subtarget->isTargetWindowsMSVC() && |
974 | (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3" )) { |
975 | // For compatibility reasons, when targetting MSVC, it is important to |
976 | // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools |
977 | // rely specifically on this pattern to be able to patch a function. |
978 | // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. |
979 | OutStreamer->emitInstruction( |
980 | MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), |
981 | *Subtarget); |
982 | } else { |
983 | unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget); |
984 | assert(NopSize == MinSize && "Could not implement MinSize!" ); |
985 | (void)NopSize; |
986 | } |
987 | } |
988 | } |
989 | |
990 | // Lower a stackmap of the form: |
991 | // <id>, <shadowBytes>, ... |
992 | void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { |
993 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
994 | |
995 | auto &Ctx = OutStreamer->getContext(); |
996 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
997 | OutStreamer->emitLabel(Symbol: MILabel); |
998 | |
999 | SM.recordStackMap(L: *MILabel, MI); |
1000 | unsigned NumShadowBytes = MI.getOperand(i: 1).getImm(); |
1001 | SMShadowTracker.reset(RequiredSize: NumShadowBytes); |
1002 | } |
1003 | |
1004 | // Lower a patchpoint of the form: |
1005 | // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... |
1006 | void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, |
1007 | X86MCInstLower &MCIL) { |
1008 | assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64" ); |
1009 | |
1010 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1011 | |
1012 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1013 | |
1014 | auto &Ctx = OutStreamer->getContext(); |
1015 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1016 | OutStreamer->emitLabel(Symbol: MILabel); |
1017 | SM.recordPatchPoint(L: *MILabel, MI); |
1018 | |
1019 | PatchPointOpers opers(&MI); |
1020 | unsigned ScratchIdx = opers.getNextScratchIdx(); |
1021 | unsigned EncodedBytes = 0; |
1022 | const MachineOperand &CalleeMO = opers.getCallTarget(); |
1023 | |
1024 | // Check for null target. If target is non-null (i.e. is non-zero or is |
1025 | // symbolic) then emit a call. |
1026 | if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { |
1027 | MCOperand CalleeMCOp; |
1028 | switch (CalleeMO.getType()) { |
1029 | default: |
1030 | /// FIXME: Add a verifier check for bad callee types. |
1031 | llvm_unreachable("Unrecognized callee operand type." ); |
1032 | case MachineOperand::MO_Immediate: |
1033 | if (CalleeMO.getImm()) |
1034 | CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm()); |
1035 | break; |
1036 | case MachineOperand::MO_ExternalSymbol: |
1037 | case MachineOperand::MO_GlobalAddress: |
1038 | CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO, |
1039 | Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO)); |
1040 | break; |
1041 | } |
1042 | |
1043 | // Emit MOV to materialize the target address and the CALL to target. |
1044 | // This is encoded with 12-13 bytes, depending on which register is used. |
1045 | Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg(); |
1046 | if (X86II::isX86_64ExtendedReg(RegNo: ScratchReg)) |
1047 | EncodedBytes = 13; |
1048 | else |
1049 | EncodedBytes = 12; |
1050 | |
1051 | EmitAndCountInstruction( |
1052 | MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); |
1053 | // FIXME: Add retpoline support and remove this. |
1054 | if (Subtarget->useIndirectThunkCalls()) |
1055 | report_fatal_error( |
1056 | reason: "Lowering patchpoint with thunks not yet implemented." ); |
1057 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); |
1058 | } |
1059 | |
1060 | // Emit padding. |
1061 | unsigned NumBytes = opers.getNumPatchBytes(); |
1062 | assert(NumBytes >= EncodedBytes && |
1063 | "Patchpoint can't request size less than the length of a call." ); |
1064 | |
1065 | emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget); |
1066 | } |
1067 | |
1068 | void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, |
1069 | X86MCInstLower &MCIL) { |
1070 | assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64" ); |
1071 | |
1072 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1073 | |
1074 | // We want to emit the following pattern, which follows the x86 calling |
1075 | // convention to prepare for the trampoline call to be patched in. |
1076 | // |
1077 | // .p2align 1, ... |
1078 | // .Lxray_event_sled_N: |
1079 | // jmp +N // jump across the instrumentation sled |
1080 | // ... // set up arguments in register |
1081 | // callq __xray_CustomEvent@plt // force dependency to symbol |
1082 | // ... |
1083 | // <jump here> |
1084 | // |
1085 | // After patching, it would look something like: |
1086 | // |
1087 | // nopw (2-byte nop) |
1088 | // ... |
1089 | // callq __xrayCustomEvent // already lowered |
1090 | // ... |
1091 | // |
1092 | // --- |
1093 | // First we emit the label and the jump. |
1094 | auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_" , AlwaysAddSuffix: true); |
1095 | OutStreamer->AddComment(T: "# XRay Custom Event Log" ); |
1096 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1097 | OutStreamer->emitLabel(Symbol: CurSled); |
1098 | |
1099 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1100 | // an operand (computed as an offset from the jmp instruction). |
1101 | // FIXME: Find another less hacky way do force the relative jump. |
1102 | OutStreamer->emitBinaryData(Data: "\xeb\x0f" ); |
1103 | |
1104 | // The default C calling convention will place two arguments into %rcx and |
1105 | // %rdx -- so we only work with those. |
1106 | const Register DestRegs[] = {X86::RDI, X86::RSI}; |
1107 | bool UsedMask[] = {false, false}; |
1108 | // Filled out in loop. |
1109 | Register SrcRegs[] = {0, 0}; |
1110 | |
1111 | // Then we put the operands in the %rdi and %rsi registers. We spill the |
1112 | // values in the register before we clobber them, and mark them as used in |
1113 | // UsedMask. In case the arguments are already in the correct register, we use |
1114 | // emit nops appropriately sized to keep the sled the same size in every |
1115 | // situation. |
1116 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1117 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I))) { |
1118 | assert(Op->isReg() && "Only support arguments in registers" ); |
1119 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op->getReg(), Size: 64); |
1120 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1121 | if (SrcRegs[I] != DestRegs[I]) { |
1122 | UsedMask[I] = true; |
1123 | EmitAndCountInstruction( |
1124 | MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); |
1125 | } else { |
1126 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1127 | } |
1128 | } |
1129 | |
1130 | // Now that the register values are stashed, mov arguments into place. |
1131 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1132 | // earlier DestReg. We will have already overwritten over the register before |
1133 | // we can copy from it. |
1134 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1135 | if (SrcRegs[I] != DestRegs[I]) |
1136 | EmitAndCountInstruction( |
1137 | MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); |
1138 | |
1139 | // We emit a hard dependency on the __xray_CustomEvent symbol, which is the |
1140 | // name of the trampoline to be implemented by the XRay runtime. |
1141 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent" ); |
1142 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1143 | if (isPositionIndependent()) |
1144 | TOp.setTargetFlags(X86II::MO_PLT); |
1145 | |
1146 | // Emit the call instruction. |
1147 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) |
1148 | .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); |
1149 | |
1150 | // Restore caller-saved and used registers. |
1151 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1152 | if (UsedMask[I]) |
1153 | EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); |
1154 | else |
1155 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1156 | |
1157 | OutStreamer->AddComment(T: "xray custom event end." ); |
1158 | |
1159 | // Record the sled version. Version 0 of this sled was spelled differently, so |
1160 | // we let the runtime handle the different offsets we're using. Version 2 |
1161 | // changed the absolute address to a PC-relative address. |
1162 | recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: 2); |
1163 | } |
1164 | |
1165 | void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, |
1166 | X86MCInstLower &MCIL) { |
1167 | assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64" ); |
1168 | |
1169 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1170 | |
1171 | // We want to emit the following pattern, which follows the x86 calling |
1172 | // convention to prepare for the trampoline call to be patched in. |
1173 | // |
1174 | // .p2align 1, ... |
1175 | // .Lxray_event_sled_N: |
1176 | // jmp +N // jump across the instrumentation sled |
1177 | // ... // set up arguments in register |
1178 | // callq __xray_TypedEvent@plt // force dependency to symbol |
1179 | // ... |
1180 | // <jump here> |
1181 | // |
1182 | // After patching, it would look something like: |
1183 | // |
1184 | // nopw (2-byte nop) |
1185 | // ... |
1186 | // callq __xrayTypedEvent // already lowered |
1187 | // ... |
1188 | // |
1189 | // --- |
1190 | // First we emit the label and the jump. |
1191 | auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_" , AlwaysAddSuffix: true); |
1192 | OutStreamer->AddComment(T: "# XRay Typed Event Log" ); |
1193 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1194 | OutStreamer->emitLabel(Symbol: CurSled); |
1195 | |
1196 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1197 | // an operand (computed as an offset from the jmp instruction). |
1198 | // FIXME: Find another less hacky way do force the relative jump. |
1199 | OutStreamer->emitBinaryData(Data: "\xeb\x14" ); |
1200 | |
1201 | // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, |
1202 | // so we'll work with those. Or we may be called via SystemV, in which case |
1203 | // we don't have to do any translation. |
1204 | const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; |
1205 | bool UsedMask[] = {false, false, false}; |
1206 | |
1207 | // Will fill out src regs in the loop. |
1208 | Register SrcRegs[] = {0, 0, 0}; |
1209 | |
1210 | // Then we put the operands in the SystemV registers. We spill the values in |
1211 | // the registers before we clobber them, and mark them as used in UsedMask. |
1212 | // In case the arguments are already in the correct register, we emit nops |
1213 | // appropriately sized to keep the sled the same size in every situation. |
1214 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1215 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I))) { |
1216 | // TODO: Is register only support adequate? |
1217 | assert(Op->isReg() && "Only supports arguments in registers" ); |
1218 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op->getReg(), Size: 64); |
1219 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1220 | if (SrcRegs[I] != DestRegs[I]) { |
1221 | UsedMask[I] = true; |
1222 | EmitAndCountInstruction( |
1223 | MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); |
1224 | } else { |
1225 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1226 | } |
1227 | } |
1228 | |
1229 | // In the above loop we only stash all of the destination registers or emit |
1230 | // nops if the arguments are already in the right place. Doing the actually |
1231 | // moving is postponed until after all the registers are stashed so nothing |
1232 | // is clobbers. We've already added nops to account for the size of mov and |
1233 | // push if the register is in the right place, so we only have to worry about |
1234 | // emitting movs. |
1235 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1236 | // earlier DestReg. We will have already overwritten over the register before |
1237 | // we can copy from it. |
1238 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1239 | if (UsedMask[I]) |
1240 | EmitAndCountInstruction( |
1241 | MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); |
1242 | |
1243 | // We emit a hard dependency on the __xray_TypedEvent symbol, which is the |
1244 | // name of the trampoline to be implemented by the XRay runtime. |
1245 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent" ); |
1246 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1247 | if (isPositionIndependent()) |
1248 | TOp.setTargetFlags(X86II::MO_PLT); |
1249 | |
1250 | // Emit the call instruction. |
1251 | EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) |
1252 | .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); |
1253 | |
1254 | // Restore caller-saved and used registers. |
1255 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1256 | if (UsedMask[I]) |
1257 | EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); |
1258 | else |
1259 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1260 | |
1261 | OutStreamer->AddComment(T: "xray typed event end." ); |
1262 | |
1263 | // Record the sled version. |
1264 | recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: 2); |
1265 | } |
1266 | |
1267 | void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, |
1268 | X86MCInstLower &MCIL) { |
1269 | |
1270 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1271 | |
1272 | const Function &F = MF->getFunction(); |
1273 | if (F.hasFnAttribute(Kind: "patchable-function-entry" )) { |
1274 | unsigned Num; |
1275 | if (F.getFnAttribute(Kind: "patchable-function-entry" ) |
1276 | .getValueAsString() |
1277 | .getAsInteger(Radix: 10, Result&: Num)) |
1278 | return; |
1279 | emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget); |
1280 | return; |
1281 | } |
1282 | // We want to emit the following pattern: |
1283 | // |
1284 | // .p2align 1, ... |
1285 | // .Lxray_sled_N: |
1286 | // jmp .tmpN |
1287 | // # 9 bytes worth of noops |
1288 | // |
1289 | // We need the 9 bytes because at runtime, we'd be patching over the full 11 |
1290 | // bytes with the following pattern: |
1291 | // |
1292 | // mov %r10, <function id, 32-bit> // 6 bytes |
1293 | // call <relative offset, 32-bits> // 5 bytes |
1294 | // |
1295 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1296 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1297 | OutStreamer->emitLabel(Symbol: CurSled); |
1298 | |
1299 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1300 | // an operand (computed as an offset from the jmp instruction). |
1301 | // FIXME: Find another less hacky way do force the relative jump. |
1302 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1303 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1304 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: 2); |
1305 | } |
1306 | |
1307 | void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, |
1308 | X86MCInstLower &MCIL) { |
1309 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1310 | |
1311 | // Since PATCHABLE_RET takes the opcode of the return statement as an |
1312 | // argument, we use that to emit the correct form of the RET that we want. |
1313 | // i.e. when we see this: |
1314 | // |
1315 | // PATCHABLE_RET X86::RET ... |
1316 | // |
1317 | // We should emit the RET followed by sleds. |
1318 | // |
1319 | // .p2align 1, ... |
1320 | // .Lxray_sled_N: |
1321 | // ret # or equivalent instruction |
1322 | // # 10 bytes worth of noops |
1323 | // |
1324 | // This just makes sure that the alignment for the next instruction is 2. |
1325 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1326 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1327 | OutStreamer->emitLabel(Symbol: CurSled); |
1328 | unsigned OpCode = MI.getOperand(i: 0).getImm(); |
1329 | MCInst Ret; |
1330 | Ret.setOpcode(OpCode); |
1331 | for (auto &MO : drop_begin(RangeOrContainer: MI.operands())) |
1332 | if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &MI, MO)) |
1333 | Ret.addOperand(Op: *MaybeOperand); |
1334 | OutStreamer->emitInstruction(Inst: Ret, STI: getSubtargetInfo()); |
1335 | emitX86Nops(OS&: *OutStreamer, NumBytes: 10, Subtarget); |
1336 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: 2); |
1337 | } |
1338 | |
1339 | void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, |
1340 | X86MCInstLower &MCIL) { |
1341 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1342 | |
1343 | // Like PATCHABLE_RET, we have the actual instruction in the operands to this |
1344 | // instruction so we lower that particular instruction and its operands. |
1345 | // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how |
1346 | // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to |
1347 | // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual |
1348 | // tail call much like how we have it in PATCHABLE_RET. |
1349 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1350 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1351 | OutStreamer->emitLabel(Symbol: CurSled); |
1352 | auto Target = OutContext.createTempSymbol(); |
1353 | |
1354 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1355 | // an operand (computed as an offset from the jmp instruction). |
1356 | // FIXME: Find another less hacky way do force the relative jump. |
1357 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1358 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1359 | OutStreamer->emitLabel(Symbol: Target); |
1360 | recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: 2); |
1361 | |
1362 | unsigned OpCode = MI.getOperand(i: 0).getImm(); |
1363 | OpCode = convertTailJumpOpcode(Opcode: OpCode); |
1364 | MCInst TC; |
1365 | TC.setOpcode(OpCode); |
1366 | |
1367 | // Before emitting the instruction, add a comment to indicate that this is |
1368 | // indeed a tail call. |
1369 | OutStreamer->AddComment(T: "TAILCALL" ); |
1370 | for (auto &MO : drop_begin(RangeOrContainer: MI.operands())) |
1371 | if (auto MaybeOperand = MCIL.LowerMachineOperand(MI: &MI, MO)) |
1372 | TC.addOperand(Op: *MaybeOperand); |
1373 | OutStreamer->emitInstruction(Inst: TC, STI: getSubtargetInfo()); |
1374 | } |
1375 | |
1376 | // Returns instruction preceding MBBI in MachineFunction. |
1377 | // If MBBI is the first instruction of the first basic block, returns null. |
1378 | static MachineBasicBlock::const_iterator |
1379 | PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { |
1380 | const MachineBasicBlock *MBB = MBBI->getParent(); |
1381 | while (MBBI == MBB->begin()) { |
1382 | if (MBB == &MBB->getParent()->front()) |
1383 | return MachineBasicBlock::const_iterator(); |
1384 | MBB = MBB->getPrevNode(); |
1385 | MBBI = MBB->end(); |
1386 | } |
1387 | --MBBI; |
1388 | return MBBI; |
1389 | } |
1390 | |
1391 | static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) { |
1392 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1393 | // Skip mask operand. |
1394 | ++SrcIdx; |
1395 | if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1396 | // Skip passthru operand. |
1397 | ++SrcIdx; |
1398 | } |
1399 | } |
1400 | return SrcIdx; |
1401 | } |
1402 | |
1403 | static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, |
1404 | unsigned SrcOpIdx) { |
1405 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1406 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()); |
1407 | |
1408 | // Handle AVX512 MASK/MASXZ write mask comments. |
1409 | // MASK: zmmX {%kY} |
1410 | // MASKZ: zmmX {%kY} {z} |
1411 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1412 | const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - 1); |
1413 | StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg()); |
1414 | CS << " {%" << Mask << "}" ; |
1415 | if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1416 | CS << " {z}" ; |
1417 | } |
1418 | } |
1419 | } |
1420 | |
1421 | static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, |
1422 | StringRef Src2Name, ArrayRef<int> Mask) { |
1423 | // One source operand, fix the mask to print all elements in one span. |
1424 | SmallVector<int, 8> ShuffleMask(Mask); |
1425 | if (Src1Name == Src2Name) |
1426 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) |
1427 | if (ShuffleMask[i] >= e) |
1428 | ShuffleMask[i] -= e; |
1429 | |
1430 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { |
1431 | if (i != 0) |
1432 | CS << "," ; |
1433 | if (ShuffleMask[i] == SM_SentinelZero) { |
1434 | CS << "zero" ; |
1435 | continue; |
1436 | } |
1437 | |
1438 | // Otherwise, it must come from src1 or src2. Print the span of elements |
1439 | // that comes from this src. |
1440 | bool isSrc1 = ShuffleMask[i] < (int)e; |
1441 | CS << (isSrc1 ? Src1Name : Src2Name) << '['; |
1442 | |
1443 | bool IsFirst = true; |
1444 | while (i != e && ShuffleMask[i] != SM_SentinelZero && |
1445 | (ShuffleMask[i] < (int)e) == isSrc1) { |
1446 | if (!IsFirst) |
1447 | CS << ','; |
1448 | else |
1449 | IsFirst = false; |
1450 | if (ShuffleMask[i] == SM_SentinelUndef) |
1451 | CS << "u" ; |
1452 | else |
1453 | CS << ShuffleMask[i] % (int)e; |
1454 | ++i; |
1455 | } |
1456 | CS << ']'; |
1457 | --i; // For loop increments element #. |
1458 | } |
1459 | } |
1460 | |
1461 | static std::string (const MachineInstr *MI, unsigned SrcOp1Idx, |
1462 | unsigned SrcOp2Idx, ArrayRef<int> Mask) { |
1463 | std::string ; |
1464 | |
1465 | const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx); |
1466 | const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx); |
1467 | StringRef Src1Name = SrcOp1.isReg() |
1468 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg()) |
1469 | : "mem" ; |
1470 | StringRef Src2Name = SrcOp2.isReg() |
1471 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg()) |
1472 | : "mem" ; |
1473 | |
1474 | raw_string_ostream CS(Comment); |
1475 | printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx); |
1476 | CS << " = " ; |
1477 | printShuffleMask(CS, Src1Name, Src2Name, Mask); |
1478 | CS.flush(); |
1479 | |
1480 | return Comment; |
1481 | } |
1482 | |
1483 | static void printConstant(const APInt &Val, raw_ostream &CS, |
1484 | bool PrintZero = false) { |
1485 | if (Val.getBitWidth() <= 64) { |
1486 | CS << (PrintZero ? 0ULL : Val.getZExtValue()); |
1487 | } else { |
1488 | // print multi-word constant as (w0,w1) |
1489 | CS << "(" ; |
1490 | for (int i = 0, N = Val.getNumWords(); i < N; ++i) { |
1491 | if (i > 0) |
1492 | CS << "," ; |
1493 | CS << (PrintZero ? 0ULL : Val.getRawData()[i]); |
1494 | } |
1495 | CS << ")" ; |
1496 | } |
1497 | } |
1498 | |
1499 | static void printConstant(const APFloat &Flt, raw_ostream &CS, |
1500 | bool PrintZero = false) { |
1501 | SmallString<32> Str; |
1502 | // Force scientific notation to distinguish from integers. |
1503 | if (PrintZero) |
1504 | APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1505 | else |
1506 | Flt.toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1507 | CS << Str; |
1508 | } |
1509 | |
1510 | static void printConstant(const Constant *COp, unsigned BitWidth, |
1511 | raw_ostream &CS, bool PrintZero = false) { |
1512 | if (isa<UndefValue>(Val: COp)) { |
1513 | CS << "u" ; |
1514 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) { |
1515 | printConstant(Val: CI->getValue(), CS, PrintZero); |
1516 | } else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) { |
1517 | printConstant(Flt: CF->getValueAPF(), CS, PrintZero); |
1518 | } else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) { |
1519 | Type *EltTy = CDS->getElementType(); |
1520 | bool IsInteger = EltTy->isIntegerTy(); |
1521 | bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); |
1522 | unsigned EltBits = EltTy->getPrimitiveSizeInBits(); |
1523 | unsigned E = std::min(a: BitWidth / EltBits, b: CDS->getNumElements()); |
1524 | assert((BitWidth % EltBits) == 0 && "Element size mismatch" ); |
1525 | for (unsigned I = 0; I != E; ++I) { |
1526 | if (I != 0) |
1527 | CS << "," ; |
1528 | if (IsInteger) |
1529 | printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero); |
1530 | else if (IsFP) |
1531 | printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero); |
1532 | else |
1533 | CS << "?" ; |
1534 | } |
1535 | } else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) { |
1536 | unsigned EltBits = CV->getType()->getScalarSizeInBits(); |
1537 | unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands()); |
1538 | assert((BitWidth % EltBits) == 0 && "Element size mismatch" ); |
1539 | for (unsigned I = 0; I != E; ++I) { |
1540 | if (I != 0) |
1541 | CS << "," ; |
1542 | printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero); |
1543 | } |
1544 | } else { |
1545 | CS << "?" ; |
1546 | } |
1547 | } |
1548 | |
1549 | static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, |
1550 | int SclWidth, int VecWidth, |
1551 | const char *) { |
1552 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1553 | |
1554 | std::string ; |
1555 | raw_string_ostream CS(Comment); |
1556 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1557 | CS << " = " ; |
1558 | |
1559 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1560 | CS << "[" ; |
1561 | printConstant(COp: C, BitWidth: SclWidth, CS); |
1562 | for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { |
1563 | CS << "," ; |
1564 | printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true); |
1565 | } |
1566 | CS << "]" ; |
1567 | OutStreamer.AddComment(T: CS.str()); |
1568 | return; // early-out |
1569 | } |
1570 | |
1571 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1572 | CS << ShuffleComment; |
1573 | OutStreamer.AddComment(T: CS.str()); |
1574 | } |
1575 | |
1576 | static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, |
1577 | int Repeats, int BitWidth) { |
1578 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1579 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1580 | std::string ; |
1581 | raw_string_ostream CS(Comment); |
1582 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1583 | CS << " = [" ; |
1584 | for (int l = 0; l != Repeats; ++l) { |
1585 | if (l != 0) |
1586 | CS << "," ; |
1587 | printConstant(COp: C, BitWidth, CS); |
1588 | } |
1589 | CS << "]" ; |
1590 | OutStreamer.AddComment(T: CS.str()); |
1591 | } |
1592 | } |
1593 | |
1594 | static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1595 | int SrcEltBits, int DstEltBits, bool IsSext) { |
1596 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1597 | auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx); |
1598 | if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) { |
1599 | if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) { |
1600 | int NumElts = CDS->getNumElements(); |
1601 | std::string ; |
1602 | raw_string_ostream CS(Comment); |
1603 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1604 | CS << " = [" ; |
1605 | for (int i = 0; i != NumElts; ++i) { |
1606 | if (i != 0) |
1607 | CS << "," ; |
1608 | if (CDS->getElementType()->isIntegerTy()) { |
1609 | APInt Elt = CDS->getElementAsAPInt(i); |
1610 | Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits); |
1611 | printConstant(Val: Elt, CS); |
1612 | } else |
1613 | CS << "?" ; |
1614 | } |
1615 | CS << "]" ; |
1616 | OutStreamer.AddComment(T: CS.str()); |
1617 | return true; |
1618 | } |
1619 | } |
1620 | |
1621 | return false; |
1622 | } |
1623 | static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1624 | int SrcEltBits, int DstEltBits) { |
1625 | printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true); |
1626 | } |
1627 | static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1628 | int SrcEltBits, int DstEltBits) { |
1629 | if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false)) |
1630 | return; |
1631 | |
1632 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1633 | std::string ; |
1634 | raw_string_ostream CS(Comment); |
1635 | printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: 1)); |
1636 | CS << " = " ; |
1637 | |
1638 | SmallVector<int> Mask; |
1639 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1640 | assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 && |
1641 | "Illegal extension ratio" ); |
1642 | DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask); |
1643 | printShuffleMask(CS, Src1Name: "mem" , Src2Name: "" , Mask); |
1644 | |
1645 | OutStreamer.AddComment(T: CS.str()); |
1646 | } |
1647 | |
1648 | void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { |
1649 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
1650 | assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && |
1651 | "SEH_ instruction Windows and UEFI only" ); |
1652 | |
1653 | // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. |
1654 | if (EmitFPOData) { |
1655 | X86TargetStreamer *XTS = |
1656 | static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); |
1657 | switch (MI->getOpcode()) { |
1658 | case X86::SEH_PushReg: |
1659 | XTS->emitFPOPushReg(Reg: MI->getOperand(i: 0).getImm()); |
1660 | break; |
1661 | case X86::SEH_StackAlloc: |
1662 | XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: 0).getImm()); |
1663 | break; |
1664 | case X86::SEH_StackAlign: |
1665 | XTS->emitFPOStackAlign(Align: MI->getOperand(i: 0).getImm()); |
1666 | break; |
1667 | case X86::SEH_SetFrame: |
1668 | assert(MI->getOperand(1).getImm() == 0 && |
1669 | ".cv_fpo_setframe takes no offset" ); |
1670 | XTS->emitFPOSetFrame(Reg: MI->getOperand(i: 0).getImm()); |
1671 | break; |
1672 | case X86::SEH_EndPrologue: |
1673 | XTS->emitFPOEndPrologue(); |
1674 | break; |
1675 | case X86::SEH_SaveReg: |
1676 | case X86::SEH_SaveXMM: |
1677 | case X86::SEH_PushFrame: |
1678 | llvm_unreachable("SEH_ directive incompatible with FPO" ); |
1679 | break; |
1680 | default: |
1681 | llvm_unreachable("expected SEH_ instruction" ); |
1682 | } |
1683 | return; |
1684 | } |
1685 | |
1686 | // Otherwise, use the .seh_ directives for all other Windows platforms. |
1687 | switch (MI->getOpcode()) { |
1688 | case X86::SEH_PushReg: |
1689 | OutStreamer->emitWinCFIPushReg(Register: MI->getOperand(i: 0).getImm()); |
1690 | break; |
1691 | |
1692 | case X86::SEH_SaveReg: |
1693 | OutStreamer->emitWinCFISaveReg(Register: MI->getOperand(i: 0).getImm(), |
1694 | Offset: MI->getOperand(i: 1).getImm()); |
1695 | break; |
1696 | |
1697 | case X86::SEH_SaveXMM: |
1698 | OutStreamer->emitWinCFISaveXMM(Register: MI->getOperand(i: 0).getImm(), |
1699 | Offset: MI->getOperand(i: 1).getImm()); |
1700 | break; |
1701 | |
1702 | case X86::SEH_StackAlloc: |
1703 | OutStreamer->emitWinCFIAllocStack(Size: MI->getOperand(i: 0).getImm()); |
1704 | break; |
1705 | |
1706 | case X86::SEH_SetFrame: |
1707 | OutStreamer->emitWinCFISetFrame(Register: MI->getOperand(i: 0).getImm(), |
1708 | Offset: MI->getOperand(i: 1).getImm()); |
1709 | break; |
1710 | |
1711 | case X86::SEH_PushFrame: |
1712 | OutStreamer->emitWinCFIPushFrame(Code: MI->getOperand(i: 0).getImm()); |
1713 | break; |
1714 | |
1715 | case X86::SEH_EndPrologue: |
1716 | OutStreamer->emitWinCFIEndProlog(); |
1717 | break; |
1718 | |
1719 | default: |
1720 | llvm_unreachable("expected SEH_ instruction" ); |
1721 | } |
1722 | } |
1723 | |
1724 | static void (const MachineInstr *MI, |
1725 | MCStreamer &OutStreamer) { |
1726 | switch (MI->getOpcode()) { |
1727 | // Lower PSHUFB and VPERMILP normally but add a comment if we can find |
1728 | // a constant shuffle mask. We won't be able to do this at the MC layer |
1729 | // because the mask isn't an immediate. |
1730 | case X86::PSHUFBrm: |
1731 | case X86::VPSHUFBrm: |
1732 | case X86::VPSHUFBYrm: |
1733 | case X86::VPSHUFBZ128rm: |
1734 | case X86::VPSHUFBZ128rmk: |
1735 | case X86::VPSHUFBZ128rmkz: |
1736 | case X86::VPSHUFBZ256rm: |
1737 | case X86::VPSHUFBZ256rmk: |
1738 | case X86::VPSHUFBZ256rmkz: |
1739 | case X86::VPSHUFBZrm: |
1740 | case X86::VPSHUFBZrmk: |
1741 | case X86::VPSHUFBZrmkz: { |
1742 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1743 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1744 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1745 | SmallVector<int, 64> Mask; |
1746 | DecodePSHUFBMask(C, Width, ShuffleMask&: Mask); |
1747 | if (!Mask.empty()) |
1748 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1749 | } |
1750 | break; |
1751 | } |
1752 | |
1753 | case X86::VPERMILPSrm: |
1754 | case X86::VPERMILPSYrm: |
1755 | case X86::VPERMILPSZ128rm: |
1756 | case X86::VPERMILPSZ128rmk: |
1757 | case X86::VPERMILPSZ128rmkz: |
1758 | case X86::VPERMILPSZ256rm: |
1759 | case X86::VPERMILPSZ256rmk: |
1760 | case X86::VPERMILPSZ256rmkz: |
1761 | case X86::VPERMILPSZrm: |
1762 | case X86::VPERMILPSZrmk: |
1763 | case X86::VPERMILPSZrmkz: { |
1764 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1765 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1766 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1767 | SmallVector<int, 16> Mask; |
1768 | DecodeVPERMILPMask(C, ElSize: 32, Width, ShuffleMask&: Mask); |
1769 | if (!Mask.empty()) |
1770 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1771 | } |
1772 | break; |
1773 | } |
1774 | case X86::VPERMILPDrm: |
1775 | case X86::VPERMILPDYrm: |
1776 | case X86::VPERMILPDZ128rm: |
1777 | case X86::VPERMILPDZ128rmk: |
1778 | case X86::VPERMILPDZ128rmkz: |
1779 | case X86::VPERMILPDZ256rm: |
1780 | case X86::VPERMILPDZ256rmk: |
1781 | case X86::VPERMILPDZ256rmkz: |
1782 | case X86::VPERMILPDZrm: |
1783 | case X86::VPERMILPDZrmk: |
1784 | case X86::VPERMILPDZrmkz: { |
1785 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1786 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1787 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1788 | SmallVector<int, 16> Mask; |
1789 | DecodeVPERMILPMask(C, ElSize: 64, Width, ShuffleMask&: Mask); |
1790 | if (!Mask.empty()) |
1791 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1792 | } |
1793 | break; |
1794 | } |
1795 | |
1796 | case X86::VPERMIL2PDrm: |
1797 | case X86::VPERMIL2PSrm: |
1798 | case X86::VPERMIL2PDYrm: |
1799 | case X86::VPERMIL2PSYrm: { |
1800 | assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && |
1801 | "Unexpected number of operands!" ); |
1802 | |
1803 | const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - 1); |
1804 | if (!CtrlOp.isImm()) |
1805 | break; |
1806 | |
1807 | unsigned ElSize; |
1808 | switch (MI->getOpcode()) { |
1809 | default: llvm_unreachable("Invalid opcode" ); |
1810 | case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; |
1811 | case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; |
1812 | } |
1813 | |
1814 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1815 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1816 | SmallVector<int, 16> Mask; |
1817 | DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask); |
1818 | if (!Mask.empty()) |
1819 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1820 | } |
1821 | break; |
1822 | } |
1823 | |
1824 | case X86::VPPERMrrm: { |
1825 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1826 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1827 | SmallVector<int, 16> Mask; |
1828 | DecodeVPPERMMask(C, Width, ShuffleMask&: Mask); |
1829 | if (!Mask.empty()) |
1830 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1831 | } |
1832 | break; |
1833 | } |
1834 | |
1835 | case X86::MMX_MOVQ64rm: { |
1836 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 1)) { |
1837 | std::string ; |
1838 | raw_string_ostream CS(Comment); |
1839 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1840 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = " ; |
1841 | if (auto *CF = dyn_cast<ConstantFP>(Val: C)) { |
1842 | CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: 16, Signed: false); |
1843 | OutStreamer.AddComment(T: CS.str()); |
1844 | } |
1845 | } |
1846 | break; |
1847 | } |
1848 | |
1849 | #define MASK_AVX512_CASE(Instr) \ |
1850 | case Instr: \ |
1851 | case Instr##k: \ |
1852 | case Instr##kz: |
1853 | |
1854 | case X86::MOVSDrm: |
1855 | case X86::VMOVSDrm: |
1856 | MASK_AVX512_CASE(X86::VMOVSDZrm) |
1857 | case X86::MOVSDrm_alt: |
1858 | case X86::VMOVSDrm_alt: |
1859 | case X86::VMOVSDZrm_alt: |
1860 | case X86::MOVQI2PQIrm: |
1861 | case X86::VMOVQI2PQIrm: |
1862 | case X86::VMOVQI2PQIZrm: |
1863 | printZeroUpperMove(MI, OutStreamer, SclWidth: 64, VecWidth: 128, ShuffleComment: "mem[0],zero" ); |
1864 | break; |
1865 | |
1866 | MASK_AVX512_CASE(X86::VMOVSHZrm) |
1867 | case X86::VMOVSHZrm_alt: |
1868 | printZeroUpperMove(MI, OutStreamer, SclWidth: 16, VecWidth: 128, |
1869 | ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero" ); |
1870 | break; |
1871 | |
1872 | case X86::MOVSSrm: |
1873 | case X86::VMOVSSrm: |
1874 | MASK_AVX512_CASE(X86::VMOVSSZrm) |
1875 | case X86::MOVSSrm_alt: |
1876 | case X86::VMOVSSrm_alt: |
1877 | case X86::VMOVSSZrm_alt: |
1878 | case X86::MOVDI2PDIrm: |
1879 | case X86::VMOVDI2PDIrm: |
1880 | case X86::VMOVDI2PDIZrm: |
1881 | printZeroUpperMove(MI, OutStreamer, SclWidth: 32, VecWidth: 128, ShuffleComment: "mem[0],zero,zero,zero" ); |
1882 | break; |
1883 | |
1884 | #define MOV_CASE(Prefix, Suffix) \ |
1885 | case X86::Prefix##MOVAPD##Suffix##rm: \ |
1886 | case X86::Prefix##MOVAPS##Suffix##rm: \ |
1887 | case X86::Prefix##MOVUPD##Suffix##rm: \ |
1888 | case X86::Prefix##MOVUPS##Suffix##rm: \ |
1889 | case X86::Prefix##MOVDQA##Suffix##rm: \ |
1890 | case X86::Prefix##MOVDQU##Suffix##rm: |
1891 | |
1892 | #define MOV_AVX512_CASE(Suffix, Postfix) \ |
1893 | case X86::VMOVDQA64##Suffix##rm##Postfix: \ |
1894 | case X86::VMOVDQA32##Suffix##rm##Postfix: \ |
1895 | case X86::VMOVDQU64##Suffix##rm##Postfix: \ |
1896 | case X86::VMOVDQU32##Suffix##rm##Postfix: \ |
1897 | case X86::VMOVDQU16##Suffix##rm##Postfix: \ |
1898 | case X86::VMOVDQU8##Suffix##rm##Postfix: \ |
1899 | case X86::VMOVAPS##Suffix##rm##Postfix: \ |
1900 | case X86::VMOVAPD##Suffix##rm##Postfix: \ |
1901 | case X86::VMOVUPS##Suffix##rm##Postfix: \ |
1902 | case X86::VMOVUPD##Suffix##rm##Postfix: |
1903 | |
1904 | #define CASE_128_MOV_RM() \ |
1905 | MOV_CASE(, ) /* SSE */ \ |
1906 | MOV_CASE(V, ) /* AVX-128 */ \ |
1907 | MOV_AVX512_CASE(Z128, ) \ |
1908 | MOV_AVX512_CASE(Z128, k) \ |
1909 | MOV_AVX512_CASE(Z128, kz) |
1910 | |
1911 | #define CASE_256_MOV_RM() \ |
1912 | MOV_CASE(V, Y) /* AVX-256 */ \ |
1913 | MOV_AVX512_CASE(Z256, ) \ |
1914 | MOV_AVX512_CASE(Z256, k) \ |
1915 | MOV_AVX512_CASE(Z256, kz) \ |
1916 | |
1917 | #define CASE_512_MOV_RM() \ |
1918 | MOV_AVX512_CASE(Z, ) \ |
1919 | MOV_AVX512_CASE(Z, k) \ |
1920 | MOV_AVX512_CASE(Z, kz) \ |
1921 | |
1922 | // For loads from a constant pool to a vector register, print the constant |
1923 | // loaded. |
1924 | CASE_128_MOV_RM() |
1925 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 128); |
1926 | break; |
1927 | CASE_256_MOV_RM() |
1928 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 256); |
1929 | break; |
1930 | CASE_512_MOV_RM() |
1931 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 512); |
1932 | break; |
1933 | case X86::VBROADCASTF128rm: |
1934 | case X86::VBROADCASTI128rm: |
1935 | MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) |
1936 | MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm) |
1937 | MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) |
1938 | MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm) |
1939 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 128); |
1940 | break; |
1941 | MASK_AVX512_CASE(X86::VBROADCASTF32X4rm) |
1942 | MASK_AVX512_CASE(X86::VBROADCASTF64X2rm) |
1943 | MASK_AVX512_CASE(X86::VBROADCASTI32X4rm) |
1944 | MASK_AVX512_CASE(X86::VBROADCASTI64X2rm) |
1945 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 128); |
1946 | break; |
1947 | MASK_AVX512_CASE(X86::VBROADCASTF32X8rm) |
1948 | MASK_AVX512_CASE(X86::VBROADCASTF64X4rm) |
1949 | MASK_AVX512_CASE(X86::VBROADCASTI32X8rm) |
1950 | MASK_AVX512_CASE(X86::VBROADCASTI64X4rm) |
1951 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 256); |
1952 | break; |
1953 | |
1954 | // For broadcast loads from a constant pool to a vector register, repeatedly |
1955 | // print the constant loaded. |
1956 | case X86::MOVDDUPrm: |
1957 | case X86::VMOVDDUPrm: |
1958 | MASK_AVX512_CASE(X86::VMOVDDUPZ128rm) |
1959 | case X86::VPBROADCASTQrm: |
1960 | MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm) |
1961 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 64); |
1962 | break; |
1963 | case X86::VBROADCASTSDYrm: |
1964 | MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm) |
1965 | case X86::VPBROADCASTQYrm: |
1966 | MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm) |
1967 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 64); |
1968 | break; |
1969 | MASK_AVX512_CASE(X86::VBROADCASTSDZrm) |
1970 | MASK_AVX512_CASE(X86::VPBROADCASTQZrm) |
1971 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 64); |
1972 | break; |
1973 | case X86::VBROADCASTSSrm: |
1974 | MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm) |
1975 | case X86::VPBROADCASTDrm: |
1976 | MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm) |
1977 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 32); |
1978 | break; |
1979 | case X86::VBROADCASTSSYrm: |
1980 | MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm) |
1981 | case X86::VPBROADCASTDYrm: |
1982 | MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm) |
1983 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 32); |
1984 | break; |
1985 | MASK_AVX512_CASE(X86::VBROADCASTSSZrm) |
1986 | MASK_AVX512_CASE(X86::VPBROADCASTDZrm) |
1987 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 32); |
1988 | break; |
1989 | case X86::VPBROADCASTWrm: |
1990 | MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm) |
1991 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 16); |
1992 | break; |
1993 | case X86::VPBROADCASTWYrm: |
1994 | MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm) |
1995 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 16); |
1996 | break; |
1997 | MASK_AVX512_CASE(X86::VPBROADCASTWZrm) |
1998 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 16); |
1999 | break; |
2000 | case X86::VPBROADCASTBrm: |
2001 | MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm) |
2002 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 8); |
2003 | break; |
2004 | case X86::VPBROADCASTBYrm: |
2005 | MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm) |
2006 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 8); |
2007 | break; |
2008 | MASK_AVX512_CASE(X86::VPBROADCASTBZrm) |
2009 | printBroadcast(MI, OutStreamer, Repeats: 64, BitWidth: 8); |
2010 | break; |
2011 | |
2012 | #define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \ |
2013 | case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix: |
2014 | |
2015 | #define CASE_MOVX_RM(Ext, Type) \ |
2016 | MOVX_CASE(, Ext, Type, , ) \ |
2017 | MOVX_CASE(V, Ext, Type, , ) \ |
2018 | MOVX_CASE(V, Ext, Type, Y, ) \ |
2019 | MOVX_CASE(V, Ext, Type, Z128, ) \ |
2020 | MOVX_CASE(V, Ext, Type, Z128, k ) \ |
2021 | MOVX_CASE(V, Ext, Type, Z128, kz ) \ |
2022 | MOVX_CASE(V, Ext, Type, Z256, ) \ |
2023 | MOVX_CASE(V, Ext, Type, Z256, k ) \ |
2024 | MOVX_CASE(V, Ext, Type, Z256, kz ) \ |
2025 | MOVX_CASE(V, Ext, Type, Z, ) \ |
2026 | MOVX_CASE(V, Ext, Type, Z, k ) \ |
2027 | MOVX_CASE(V, Ext, Type, Z, kz ) |
2028 | |
2029 | CASE_MOVX_RM(SX, BD) |
2030 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2031 | break; |
2032 | CASE_MOVX_RM(SX, BQ) |
2033 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2034 | break; |
2035 | CASE_MOVX_RM(SX, BW) |
2036 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2037 | break; |
2038 | CASE_MOVX_RM(SX, DQ) |
2039 | printSignExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2040 | break; |
2041 | CASE_MOVX_RM(SX, WD) |
2042 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2043 | break; |
2044 | CASE_MOVX_RM(SX, WQ) |
2045 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2046 | break; |
2047 | |
2048 | CASE_MOVX_RM(ZX, BD) |
2049 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2050 | break; |
2051 | CASE_MOVX_RM(ZX, BQ) |
2052 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2053 | break; |
2054 | CASE_MOVX_RM(ZX, BW) |
2055 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2056 | break; |
2057 | CASE_MOVX_RM(ZX, DQ) |
2058 | printZeroExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2059 | break; |
2060 | CASE_MOVX_RM(ZX, WD) |
2061 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2062 | break; |
2063 | CASE_MOVX_RM(ZX, WQ) |
2064 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2065 | break; |
2066 | } |
2067 | } |
2068 | |
2069 | void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { |
2070 | // FIXME: Enable feature predicate checks once all the test pass. |
2071 | // X86_MC::verifyInstructionPredicates(MI->getOpcode(), |
2072 | // Subtarget->getFeatureBits()); |
2073 | |
2074 | X86MCInstLower MCInstLowering(*MF, *this); |
2075 | const X86RegisterInfo *RI = |
2076 | MF->getSubtarget<X86Subtarget>().getRegisterInfo(); |
2077 | |
2078 | if (MI->getOpcode() == X86::OR64rm) { |
2079 | for (auto &Opd : MI->operands()) { |
2080 | if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == |
2081 | "swift_async_extendedFramePointerFlags" ) { |
2082 | ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; |
2083 | } |
2084 | } |
2085 | } |
2086 | |
2087 | // Add comments for values loaded from constant pool. |
2088 | if (OutStreamer->isVerboseAsm()) |
2089 | addConstantComments(MI, OutStreamer&: *OutStreamer); |
2090 | |
2091 | // Add a comment about EVEX compression |
2092 | if (TM.Options.MCOptions.ShowMCEncoding) { |
2093 | if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) |
2094 | OutStreamer->AddComment(T: "EVEX TO LEGACY Compression " , EOL: false); |
2095 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) |
2096 | OutStreamer->AddComment(T: "EVEX TO VEX Compression " , EOL: false); |
2097 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) |
2098 | OutStreamer->AddComment(T: "EVEX TO EVEX Compression " , EOL: false); |
2099 | } |
2100 | |
2101 | switch (MI->getOpcode()) { |
2102 | case TargetOpcode::DBG_VALUE: |
2103 | llvm_unreachable("Should be handled target independently" ); |
2104 | |
2105 | case X86::EH_RETURN: |
2106 | case X86::EH_RETURN64: { |
2107 | // Lower these as normal, but add some comments. |
2108 | Register Reg = MI->getOperand(i: 0).getReg(); |
2109 | OutStreamer->AddComment(T: StringRef("eh_return, addr: %" ) + |
2110 | X86ATTInstPrinter::getRegisterName(Reg)); |
2111 | break; |
2112 | } |
2113 | case X86::CLEANUPRET: { |
2114 | // Lower these as normal, but add some comments. |
2115 | OutStreamer->AddComment(T: "CLEANUPRET" ); |
2116 | break; |
2117 | } |
2118 | |
2119 | case X86::CATCHRET: { |
2120 | // Lower these as normal, but add some comments. |
2121 | OutStreamer->AddComment(T: "CATCHRET" ); |
2122 | break; |
2123 | } |
2124 | |
2125 | case X86::ENDBR32: |
2126 | case X86::ENDBR64: { |
2127 | // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for |
2128 | // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be |
2129 | // non-empty. If MI is the initial ENDBR, place the |
2130 | // __patchable_function_entries label after ENDBR. |
2131 | if (CurrentPatchableFunctionEntrySym && |
2132 | CurrentPatchableFunctionEntrySym == CurrentFnBegin && |
2133 | MI == &MF->front().front()) { |
2134 | MCInst Inst; |
2135 | MCInstLowering.Lower(MI, OutMI&: Inst); |
2136 | EmitAndCountInstruction(Inst); |
2137 | CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch" ); |
2138 | OutStreamer->emitLabel(Symbol: CurrentPatchableFunctionEntrySym); |
2139 | return; |
2140 | } |
2141 | break; |
2142 | } |
2143 | |
2144 | case X86::TAILJMPd64: |
2145 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) |
2146 | EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); |
2147 | [[fallthrough]]; |
2148 | case X86::TAILJMPr: |
2149 | case X86::TAILJMPm: |
2150 | case X86::TAILJMPd: |
2151 | case X86::TAILJMPd_CC: |
2152 | case X86::TAILJMPr64: |
2153 | case X86::TAILJMPm64: |
2154 | case X86::TAILJMPd64_CC: |
2155 | case X86::TAILJMPr64_REX: |
2156 | case X86::TAILJMPm64_REX: |
2157 | // Lower these as normal, but add some comments. |
2158 | OutStreamer->AddComment(T: "TAILCALL" ); |
2159 | break; |
2160 | |
2161 | case X86::TLS_addr32: |
2162 | case X86::TLS_addr64: |
2163 | case X86::TLS_addrX32: |
2164 | case X86::TLS_base_addr32: |
2165 | case X86::TLS_base_addr64: |
2166 | case X86::TLS_base_addrX32: |
2167 | return LowerTlsAddr(MCInstLowering, MI: *MI); |
2168 | |
2169 | case X86::MOVPC32r: { |
2170 | // This is a pseudo op for a two instruction sequence with a label, which |
2171 | // looks like: |
2172 | // call "L1$pb" |
2173 | // "L1$pb": |
2174 | // popl %esi |
2175 | |
2176 | // Emit the call. |
2177 | MCSymbol *PICBase = MF->getPICBaseSymbol(); |
2178 | // FIXME: We would like an efficient form for this, so we don't have to do a |
2179 | // lot of extra uniquing. |
2180 | EmitAndCountInstruction( |
2181 | MCInstBuilder(X86::CALLpcrel32) |
2182 | .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); |
2183 | |
2184 | const X86FrameLowering *FrameLowering = |
2185 | MF->getSubtarget<X86Subtarget>().getFrameLowering(); |
2186 | bool hasFP = FrameLowering->hasFP(MF: *MF); |
2187 | |
2188 | // TODO: This is needed only if we require precise CFA. |
2189 | bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && |
2190 | !OutStreamer->getDwarfFrameInfos().back().End; |
2191 | |
2192 | int stackGrowth = -RI->getSlotSize(); |
2193 | |
2194 | if (HasActiveDwarfFrame && !hasFP) { |
2195 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth); |
2196 | MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
2197 | } |
2198 | |
2199 | // Emit the label. |
2200 | OutStreamer->emitLabel(Symbol: PICBase); |
2201 | |
2202 | // popl $reg |
2203 | EmitAndCountInstruction( |
2204 | MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); |
2205 | |
2206 | if (HasActiveDwarfFrame && !hasFP) { |
2207 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: stackGrowth); |
2208 | } |
2209 | return; |
2210 | } |
2211 | |
2212 | case X86::ADD32ri: { |
2213 | // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. |
2214 | if (MI->getOperand(i: 2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) |
2215 | break; |
2216 | |
2217 | // Okay, we have something like: |
2218 | // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) |
2219 | |
2220 | // For this, we want to print something like: |
2221 | // MYGLOBAL + (. - PICBASE) |
2222 | // However, we can't generate a ".", so just emit a new label here and refer |
2223 | // to it. |
2224 | MCSymbol *DotSym = OutContext.createTempSymbol(); |
2225 | OutStreamer->emitLabel(Symbol: DotSym); |
2226 | |
2227 | // Now that we have emitted the label, lower the complex operand expression. |
2228 | MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: 2)); |
2229 | |
2230 | const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext); |
2231 | const MCExpr *PICBase = |
2232 | MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext); |
2233 | DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext); |
2234 | |
2235 | DotExpr = MCBinaryExpr::createAdd( |
2236 | LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext); |
2237 | |
2238 | EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) |
2239 | .addReg(MI->getOperand(0).getReg()) |
2240 | .addReg(MI->getOperand(1).getReg()) |
2241 | .addExpr(DotExpr)); |
2242 | return; |
2243 | } |
2244 | case TargetOpcode::STATEPOINT: |
2245 | return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering); |
2246 | |
2247 | case TargetOpcode::FAULTING_OP: |
2248 | return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering); |
2249 | |
2250 | case TargetOpcode::FENTRY_CALL: |
2251 | return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering); |
2252 | |
2253 | case TargetOpcode::PATCHABLE_OP: |
2254 | return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering); |
2255 | |
2256 | case TargetOpcode::STACKMAP: |
2257 | return LowerSTACKMAP(MI: *MI); |
2258 | |
2259 | case TargetOpcode::PATCHPOINT: |
2260 | return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering); |
2261 | |
2262 | case TargetOpcode::PATCHABLE_FUNCTION_ENTER: |
2263 | return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering); |
2264 | |
2265 | case TargetOpcode::PATCHABLE_RET: |
2266 | return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering); |
2267 | |
2268 | case TargetOpcode::PATCHABLE_TAIL_CALL: |
2269 | return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering); |
2270 | |
2271 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
2272 | return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2273 | |
2274 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
2275 | return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2276 | |
2277 | case X86::MORESTACK_RET: |
2278 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2279 | return; |
2280 | |
2281 | case X86::KCFI_CHECK: |
2282 | return LowerKCFI_CHECK(MI: *MI); |
2283 | |
2284 | case X86::ASAN_CHECK_MEMACCESS: |
2285 | return LowerASAN_CHECK_MEMACCESS(MI: *MI); |
2286 | |
2287 | case X86::MORESTACK_RET_RESTORE_R10: |
2288 | // Return, then restore R10. |
2289 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2290 | EmitAndCountInstruction( |
2291 | MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); |
2292 | return; |
2293 | |
2294 | case X86::SEH_PushReg: |
2295 | case X86::SEH_SaveReg: |
2296 | case X86::SEH_SaveXMM: |
2297 | case X86::SEH_StackAlloc: |
2298 | case X86::SEH_StackAlign: |
2299 | case X86::SEH_SetFrame: |
2300 | case X86::SEH_PushFrame: |
2301 | case X86::SEH_EndPrologue: |
2302 | EmitSEHInstruction(MI); |
2303 | return; |
2304 | |
2305 | case X86::SEH_Epilogue: { |
2306 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
2307 | MachineBasicBlock::const_iterator MBBI(MI); |
2308 | // Check if preceded by a call and emit nop if so. |
2309 | for (MBBI = PrevCrossBBInst(MBBI); |
2310 | MBBI != MachineBasicBlock::const_iterator(); |
2311 | MBBI = PrevCrossBBInst(MBBI)) { |
2312 | // Pseudo instructions that aren't a call are assumed to not emit any |
2313 | // code. If they do, we worst case generate unnecessary noops after a |
2314 | // call. |
2315 | if (MBBI->isCall() || !MBBI->isPseudo()) { |
2316 | if (MBBI->isCall()) |
2317 | EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); |
2318 | break; |
2319 | } |
2320 | } |
2321 | return; |
2322 | } |
2323 | case X86::UBSAN_UD1: |
2324 | EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) |
2325 | .addReg(X86::EAX) |
2326 | .addReg(X86::EAX) |
2327 | .addImm(1) |
2328 | .addReg(X86::NoRegister) |
2329 | .addImm(MI->getOperand(0).getImm()) |
2330 | .addReg(X86::NoRegister)); |
2331 | return; |
2332 | case X86::CALL64pcrel32: |
2333 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) |
2334 | EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); |
2335 | break; |
2336 | } |
2337 | |
2338 | MCInst TmpInst; |
2339 | MCInstLowering.Lower(MI, OutMI&: TmpInst); |
2340 | |
2341 | // Stackmap shadows cannot include branch targets, so we can count the bytes |
2342 | // in a call towards the shadow, but must ensure that the no thread returns |
2343 | // in to the stackmap shadow. The only way to achieve this is if the call |
2344 | // is at the end of the shadow. |
2345 | if (MI->isCall()) { |
2346 | // Count then size of the call towards the shadow |
2347 | SMShadowTracker.count(Inst&: TmpInst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
2348 | // Then flush the shadow so that we fill with nops before the call, not |
2349 | // after it. |
2350 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
2351 | // Then emit the call |
2352 | OutStreamer->emitInstruction(Inst: TmpInst, STI: getSubtargetInfo()); |
2353 | return; |
2354 | } |
2355 | |
2356 | EmitAndCountInstruction(Inst&: TmpInst); |
2357 | } |
2358 | |