1 | //==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// Pass that injects an MI thunk that is used to lower indirect calls in a way |
11 | /// that prevents speculation on some x86 processors and can be used to mitigate |
12 | /// security vulnerabilities due to targeted speculative execution and side |
13 | /// channels such as CVE-2017-5715. |
14 | /// |
15 | /// Currently supported thunks include: |
16 | /// - Retpoline -- A RET-implemented trampoline that lowers indirect calls |
17 | /// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization |
18 | /// before making an indirect call/jump |
19 | /// |
20 | /// Note that the reason that this is implemented as a MachineFunctionPass and |
21 | /// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline |
22 | /// serialize all transformations, which can consume lots of memory. |
23 | /// |
24 | /// TODO(chandlerc): All of this code could use better comments and |
25 | /// documentation. |
26 | /// |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include "X86.h" |
30 | #include "X86InstrBuilder.h" |
31 | #include "X86Subtarget.h" |
32 | #include "llvm/CodeGen/IndirectThunks.h" |
33 | #include "llvm/CodeGen/MachineFunction.h" |
34 | #include "llvm/CodeGen/MachineFunctionPass.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | #include "llvm/CodeGen/Passes.h" |
38 | #include "llvm/CodeGen/TargetPassConfig.h" |
39 | #include "llvm/IR/IRBuilder.h" |
40 | #include "llvm/IR/Instructions.h" |
41 | #include "llvm/IR/Module.h" |
42 | #include "llvm/Support/CommandLine.h" |
43 | #include "llvm/Support/Debug.h" |
44 | #include "llvm/Support/raw_ostream.h" |
45 | #include "llvm/Target/TargetMachine.h" |
46 | |
47 | using namespace llvm; |
48 | |
49 | #define DEBUG_TYPE "x86-retpoline-thunks" |
50 | |
51 | static const char RetpolineNamePrefix[] = "__llvm_retpoline_" ; |
52 | static const char R11RetpolineName[] = "__llvm_retpoline_r11" ; |
53 | static const char EAXRetpolineName[] = "__llvm_retpoline_eax" ; |
54 | static const char ECXRetpolineName[] = "__llvm_retpoline_ecx" ; |
55 | static const char EDXRetpolineName[] = "__llvm_retpoline_edx" ; |
56 | static const char EDIRetpolineName[] = "__llvm_retpoline_edi" ; |
57 | |
58 | static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_" ; |
59 | static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11" ; |
60 | |
61 | namespace { |
62 | struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { |
63 | const char *getThunkPrefix() { return RetpolineNamePrefix; } |
64 | bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) { |
65 | if (InsertedThunks) |
66 | return false; |
67 | const auto &STI = MF.getSubtarget<X86Subtarget>(); |
68 | return (STI.useRetpolineIndirectCalls() || |
69 | STI.useRetpolineIndirectBranches()) && |
70 | !STI.useRetpolineExternalThunk(); |
71 | } |
72 | bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF); |
73 | void populateThunk(MachineFunction &MF); |
74 | }; |
75 | |
76 | struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { |
77 | const char *getThunkPrefix() { return LVIThunkNamePrefix; } |
78 | bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) { |
79 | if (InsertedThunks) |
80 | return false; |
81 | return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); |
82 | } |
83 | bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) { |
84 | createThunkFunction(MMI, Name: R11LVIThunkName); |
85 | return true; |
86 | } |
87 | void populateThunk(MachineFunction &MF) { |
88 | assert (MF.size() == 1); |
89 | MachineBasicBlock *Entry = &MF.front(); |
90 | Entry->clear(); |
91 | |
92 | // This code mitigates LVI by replacing each indirect call/jump with a |
93 | // direct call/jump to a thunk that looks like: |
94 | // ``` |
95 | // lfence |
96 | // jmpq *%r11 |
97 | // ``` |
98 | // This ensures that if the value in register %r11 was loaded from memory, |
99 | // then the value in %r11 is (architecturally) correct prior to the jump. |
100 | const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
101 | BuildMI(&MF.front(), DebugLoc(), TII->get(X86::Opcode: LFENCE)); |
102 | BuildMI(&MF.front(), DebugLoc(), TII->get(X86::Opcode: JMP64r)).addReg(X86::R11); |
103 | MF.front().addLiveIn(X86::R11); |
104 | } |
105 | }; |
106 | |
107 | class X86IndirectThunks : public MachineFunctionPass { |
108 | public: |
109 | static char ID; |
110 | |
111 | X86IndirectThunks() : MachineFunctionPass(ID) {} |
112 | |
113 | StringRef getPassName() const override { return "X86 Indirect Thunks" ; } |
114 | |
115 | bool doInitialization(Module &M) override; |
116 | bool runOnMachineFunction(MachineFunction &MF) override; |
117 | |
118 | private: |
119 | std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; |
120 | |
121 | template <typename... ThunkInserterT> |
122 | static void initTIs(Module &M, |
123 | std::tuple<ThunkInserterT...> &ThunkInserters) { |
124 | (..., std::get<ThunkInserterT>(ThunkInserters).init(M)); |
125 | } |
126 | template <typename... ThunkInserterT> |
127 | static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, |
128 | std::tuple<ThunkInserterT...> &ThunkInserters) { |
129 | return (0 | ... | std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)); |
130 | } |
131 | }; |
132 | |
133 | } // end anonymous namespace |
134 | |
135 | bool RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI, |
136 | MachineFunction &MF) { |
137 | if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) |
138 | createThunkFunction(MMI, Name: R11RetpolineName); |
139 | else |
140 | for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, |
141 | EDIRetpolineName}) |
142 | createThunkFunction(MMI, Name); |
143 | return true; |
144 | } |
145 | |
146 | void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { |
147 | bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; |
148 | Register ThunkReg; |
149 | if (Is64Bit) { |
150 | assert(MF.getName() == "__llvm_retpoline_r11" && |
151 | "Should only have an r11 thunk on 64-bit targets" ); |
152 | |
153 | // __llvm_retpoline_r11: |
154 | // callq .Lr11_call_target |
155 | // .Lr11_capture_spec: |
156 | // pause |
157 | // lfence |
158 | // jmp .Lr11_capture_spec |
159 | // .align 16 |
160 | // .Lr11_call_target: |
161 | // movq %r11, (%rsp) |
162 | // retq |
163 | ThunkReg = X86::R11; |
164 | } else { |
165 | // For 32-bit targets we need to emit a collection of thunks for various |
166 | // possible scratch registers as well as a fallback that uses EDI, which is |
167 | // normally callee saved. |
168 | // __llvm_retpoline_eax: |
169 | // calll .Leax_call_target |
170 | // .Leax_capture_spec: |
171 | // pause |
172 | // jmp .Leax_capture_spec |
173 | // .align 16 |
174 | // .Leax_call_target: |
175 | // movl %eax, (%esp) # Clobber return addr |
176 | // retl |
177 | // |
178 | // __llvm_retpoline_ecx: |
179 | // ... # Same setup |
180 | // movl %ecx, (%esp) |
181 | // retl |
182 | // |
183 | // __llvm_retpoline_edx: |
184 | // ... # Same setup |
185 | // movl %edx, (%esp) |
186 | // retl |
187 | // |
188 | // __llvm_retpoline_edi: |
189 | // ... # Same setup |
190 | // movl %edi, (%esp) |
191 | // retl |
192 | if (MF.getName() == EAXRetpolineName) |
193 | ThunkReg = X86::EAX; |
194 | else if (MF.getName() == ECXRetpolineName) |
195 | ThunkReg = X86::ECX; |
196 | else if (MF.getName() == EDXRetpolineName) |
197 | ThunkReg = X86::EDX; |
198 | else if (MF.getName() == EDIRetpolineName) |
199 | ThunkReg = X86::EDI; |
200 | else |
201 | llvm_unreachable("Invalid thunk name on x86-32!" ); |
202 | } |
203 | |
204 | const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
205 | assert (MF.size() == 1); |
206 | MachineBasicBlock *Entry = &MF.front(); |
207 | Entry->clear(); |
208 | |
209 | MachineBasicBlock *CaptureSpec = |
210 | MF.CreateMachineBasicBlock(BB: Entry->getBasicBlock()); |
211 | MachineBasicBlock *CallTarget = |
212 | MF.CreateMachineBasicBlock(BB: Entry->getBasicBlock()); |
213 | MCSymbol *TargetSym = MF.getContext().createTempSymbol(); |
214 | MF.push_back(MBB: CaptureSpec); |
215 | MF.push_back(MBB: CallTarget); |
216 | |
217 | const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; |
218 | const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32; |
219 | |
220 | Entry->addLiveIn(PhysReg: ThunkReg); |
221 | BuildMI(BB: Entry, MIMD: DebugLoc(), MCID: TII->get(Opcode: CallOpc)).addSym(Sym: TargetSym); |
222 | |
223 | // The MIR verifier thinks that the CALL in the entry block will fall through |
224 | // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is |
225 | // the successor, but the MIR verifier doesn't know how to cope with that. |
226 | Entry->addSuccessor(Succ: CaptureSpec); |
227 | |
228 | // In the capture loop for speculation, we want to stop the processor from |
229 | // speculating as fast as possible. On Intel processors, the PAUSE instruction |
230 | // will block speculation without consuming any execution resources. On AMD |
231 | // processors, the PAUSE instruction is (essentially) a nop, so we also use an |
232 | // LFENCE instruction which they have advised will stop speculation as well |
233 | // with minimal resource utilization. We still end the capture with a jump to |
234 | // form an infinite loop to fully guarantee that no matter what implementation |
235 | // of the x86 ISA, speculating this code path never escapes. |
236 | BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::Opcode: PAUSE)); |
237 | BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::Opcode: LFENCE)); |
238 | BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::Opcode: JMP_1)).addMBB(CaptureSpec); |
239 | CaptureSpec->setMachineBlockAddressTaken(); |
240 | CaptureSpec->addSuccessor(Succ: CaptureSpec); |
241 | |
242 | CallTarget->addLiveIn(PhysReg: ThunkReg); |
243 | CallTarget->setMachineBlockAddressTaken(); |
244 | CallTarget->setAlignment(Align(16)); |
245 | |
246 | // Insert return address clobber |
247 | const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; |
248 | const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; |
249 | addRegOffset(MIB: BuildMI(BB: CallTarget, MIMD: DebugLoc(), MCID: TII->get(Opcode: MovOpc)), Reg: SPReg, isKill: false, |
250 | Offset: 0) |
251 | .addReg(RegNo: ThunkReg); |
252 | |
253 | CallTarget->back().setPreInstrSymbol(MF, Symbol: TargetSym); |
254 | BuildMI(BB: CallTarget, MIMD: DebugLoc(), MCID: TII->get(Opcode: RetOpc)); |
255 | } |
256 | |
257 | FunctionPass *llvm::createX86IndirectThunksPass() { |
258 | return new X86IndirectThunks(); |
259 | } |
260 | |
261 | char X86IndirectThunks::ID = 0; |
262 | |
263 | bool X86IndirectThunks::doInitialization(Module &M) { |
264 | initTIs(M, ThunkInserters&: TIs); |
265 | return false; |
266 | } |
267 | |
268 | bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) { |
269 | LLVM_DEBUG(dbgs() << getPassName() << '\n'); |
270 | auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); |
271 | return runTIs(MMI, MF, ThunkInserters&: TIs); |
272 | } |
273 | |