1 | //===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU |
11 | /// instructions that produce single-use VGPR values. If the value is forwarded |
12 | /// to the consumer instruction prior to VGPR writeback, the hardware can |
13 | /// then skip (kill) the VGPR write. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "AMDGPU.h" |
18 | #include "GCNSubtarget.h" |
19 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
20 | #include "SIInstrInfo.h" |
21 | #include "llvm/ADT/DenseMap.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/CodeGen/MachineBasicBlock.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | #include "llvm/CodeGen/MachineInstr.h" |
28 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/Register.h" |
31 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
32 | #include "llvm/IR/DebugLoc.h" |
33 | #include "llvm/MC/MCRegister.h" |
34 | #include "llvm/Pass.h" |
35 | |
36 | using namespace llvm; |
37 | |
38 | #define DEBUG_TYPE "amdgpu-insert-single-use-vdst" |
39 | |
40 | namespace { |
41 | class AMDGPUInsertSingleUseVDST : public MachineFunctionPass { |
42 | private: |
43 | const SIInstrInfo *SII; |
44 | |
45 | public: |
46 | static char ID; |
47 | |
48 | AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {} |
49 | |
50 | void emitSingleUseVDST(MachineInstr &MI) const { |
51 | // Mark the following instruction as a single-use producer: |
52 | // s_singleuse_vdst { supr0: 1 } |
53 | BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST)) |
54 | .addImm(0x1); |
55 | } |
56 | |
57 | bool runOnMachineFunction(MachineFunction &MF) override { |
58 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
59 | if (!ST.hasVGPRSingleUseHintInsts()) |
60 | return false; |
61 | |
62 | SII = ST.getInstrInfo(); |
63 | const auto *TRI = &SII->getRegisterInfo(); |
64 | bool InstructionEmitted = false; |
65 | |
66 | for (MachineBasicBlock &MBB : MF) { |
67 | DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits |
68 | |
69 | // Handle boundaries at the end of basic block separately to avoid |
70 | // false positives. If they are live at the end of a basic block then |
71 | // assume it has more uses later on. |
72 | for (const auto &Liveouts : MBB.liveouts()) |
73 | RegisterUseCount[Liveouts.PhysReg] = 2; |
74 | |
75 | for (MachineInstr &MI : reverse(C: MBB.instrs())) { |
76 | // All registers in all operands need to be single use for an |
77 | // instruction to be marked as a single use producer. |
78 | bool AllProducerOperandsAreSingleUse = true; |
79 | |
80 | for (const auto &Operand : MI.operands()) { |
81 | if (!Operand.isReg()) |
82 | continue; |
83 | const auto Reg = Operand.getReg(); |
84 | |
85 | // Count the number of times each register is read. |
86 | if (Operand.readsReg()) |
87 | RegisterUseCount[Reg]++; |
88 | |
89 | // Do not attempt to optimise across exec mask changes. |
90 | if (MI.modifiesRegister(AMDGPU::Reg: EXEC, TRI)) { |
91 | for (auto &UsedReg : RegisterUseCount) |
92 | UsedReg.second = 2; |
93 | } |
94 | |
95 | // If we are at the point where the register first became live, |
96 | // check if the operands are single use. |
97 | if (!MI.modifiesRegister(Reg, TRI)) |
98 | continue; |
99 | if (RegisterUseCount[Reg] > 1) |
100 | AllProducerOperandsAreSingleUse = false; |
101 | // Reset uses count when a register is no longer live. |
102 | RegisterUseCount.erase(Val: Reg); |
103 | } |
104 | if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) { |
105 | // TODO: Replace with candidate logging for instruction grouping |
106 | // later. |
107 | emitSingleUseVDST(MI); |
108 | InstructionEmitted = true; |
109 | } |
110 | } |
111 | } |
112 | return InstructionEmitted; |
113 | } |
114 | }; |
115 | } // namespace |
116 | |
117 | char AMDGPUInsertSingleUseVDST::ID = 0; |
118 | |
119 | char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID; |
120 | |
121 | INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE, |
122 | "AMDGPU Insert SingleUseVDST" , false, false) |
123 | |