1//===- bolt/Passes/ValidateInternalCalls.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ValidateInternalCalls class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ValidateInternalCalls.h"
14#include "bolt/Core/BinaryBasicBlock.h"
15#include "bolt/Passes/DataflowInfoManager.h"
16#include "bolt/Passes/FrameAnalysis.h"
17#include "llvm/MC/MCInstPrinter.h"
18#include <optional>
19#include <queue>
20
21#define DEBUG_TYPE "bolt-internalcalls"
22
23namespace llvm {
24namespace bolt {
25
26namespace {
27
28// Helper used to extract the target basic block used in an internal call.
29// Return nullptr if this is not an internal call target.
30BinaryBasicBlock *getInternalCallTarget(BinaryFunction &Function,
31 const MCInst &Inst) {
32 const BinaryContext &BC = Function.getBinaryContext();
33 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
34 !Inst.getOperand(i: 0).isExpr())
35 return nullptr;
36
37 return Function.getBasicBlockForLabel(Label: BC.MIB->getTargetSymbol(Inst));
38}
39
40// A special StackPointerTracking that considers internal calls
41class StackPointerTrackingForInternalCalls
42 : public StackPointerTrackingBase<StackPointerTrackingForInternalCalls> {
43 friend class DataflowAnalysis<StackPointerTrackingForInternalCalls,
44 std::pair<int, int>>;
45
46 std::optional<unsigned> AnnotationIndex;
47
48protected:
49 // We change the starting state to only consider the first block as an
50 // entry point, otherwise the analysis won't converge (there will be two valid
51 // stack offsets, one for an external call and another for an internal call).
52 std::pair<int, int> getStartingStateAtBB(const BinaryBasicBlock &BB) {
53 if (&BB == &*Func.begin())
54 return std::make_pair(x: -8, y: getEmpty());
55 return std::make_pair(x: getEmpty(), y: getEmpty());
56 }
57
58 // Here we decrement SP for internal calls too, in addition to the regular
59 // StackPointerTracking processing.
60 std::pair<int, int> computeNext(const MCInst &Point,
61 const std::pair<int, int> &Cur) {
62 std::pair<int, int> Res = StackPointerTrackingBase<
63 StackPointerTrackingForInternalCalls>::computeNext(Point, Cur);
64 if (Res.first == StackPointerTracking::SUPERPOSITION ||
65 Res.first == StackPointerTracking::EMPTY)
66 return Res;
67
68 if (BC.MIB->isReturn(Inst: Point)) {
69 Res.first += 8;
70 return Res;
71 }
72
73 BinaryBasicBlock *Target = getInternalCallTarget(Function&: Func, Inst: Point);
74 if (!Target)
75 return Res;
76
77 Res.first -= 8;
78 return Res;
79 }
80
81 StringRef getAnnotationName() const {
82 return StringRef("StackPointerTrackingForInternalCalls");
83 }
84
85public:
86 StackPointerTrackingForInternalCalls(BinaryFunction &BF)
87 : StackPointerTrackingBase<StackPointerTrackingForInternalCalls>(BF) {}
88
89 void run() {
90 StackPointerTrackingBase<StackPointerTrackingForInternalCalls>::run();
91 }
92};
93
94} // end anonymous namespace
95
96void ValidateInternalCalls::fixCFGForPIC(BinaryFunction &Function) const {
97 std::queue<BinaryBasicBlock *> Work;
98 for (BinaryBasicBlock &BB : Function)
99 Work.emplace(args: &BB);
100
101 while (!Work.empty()) {
102 BinaryBasicBlock &BB = *Work.front();
103 Work.pop();
104
105 // Search for the next internal call.
106 const BinaryBasicBlock::iterator InternalCall =
107 llvm::find_if(Range&: BB, P: [&](const MCInst &Inst) {
108 return getInternalCallTarget(Function, Inst) != nullptr;
109 });
110
111 // No internal call? Done with this block.
112 if (InternalCall == BB.end())
113 continue;
114
115 BinaryBasicBlock *Target = getInternalCallTarget(Function, Inst: *InternalCall);
116 InstructionListType MovedInsts = BB.splitInstructions(Inst: &*InternalCall);
117 if (!MovedInsts.empty()) {
118 // Split this block at the call instruction.
119 std::unique_ptr<BinaryBasicBlock> NewBB = Function.createBasicBlock();
120 NewBB->addInstructions(Begin: MovedInsts.begin(), End: MovedInsts.end());
121 BB.moveAllSuccessorsTo(New: NewBB.get());
122
123 Work.emplace(args: NewBB.get());
124 std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
125 NewBBs.emplace_back(args: std::move(NewBB));
126 Function.insertBasicBlocks(Start: &BB, NewBBs: std::move(NewBBs));
127 }
128 // Update successors
129 BB.removeAllSuccessors();
130 BB.addSuccessor(Succ: Target, Count: BB.getExecutionCount(), MispredictedCount: 0ULL);
131 }
132}
133
134bool ValidateInternalCalls::fixCFGForIC(BinaryFunction &Function) const {
135 const BinaryContext &BC = Function.getBinaryContext();
136 // Track SP value
137 StackPointerTrackingForInternalCalls SPTIC(Function);
138 SPTIC.run();
139
140 // Track instructions reaching a given point of the CFG to answer
141 // "There is a path from entry to point A that contains instruction B"
142 ReachingInsns<false> RI(Function);
143 RI.run();
144
145 // We use the InsnToBB map that DataflowInfoManager provides us
146 DataflowInfoManager Info(Function, nullptr, nullptr);
147
148 bool Updated = false;
149
150 auto processReturns = [&](BinaryBasicBlock &BB, MCInst &Return) {
151 // Check all reaching internal calls
152 for (auto I = RI.expr_begin(Point: Return), E = RI.expr_end(); I != E; ++I) {
153 MCInst &ReachingInst = **I;
154 if (!getInternalCallTarget(Function, Inst: ReachingInst) ||
155 BC.MIB->hasAnnotation(Inst: ReachingInst, Name: getProcessedICTag()))
156 continue;
157
158 // Stack pointer matching
159 int SPAtCall = SPTIC.getStateAt(Point: ReachingInst)->first;
160 int SPAtRet = SPTIC.getStateAt(Point: Return)->first;
161 if (SPAtCall != StackPointerTracking::SUPERPOSITION &&
162 SPAtRet != StackPointerTracking::SUPERPOSITION &&
163 SPAtCall != SPAtRet - 8)
164 continue;
165
166 Updated = true;
167
168 // Mark this call as processed, so we don't try to analyze it as a
169 // PIC-computation internal call.
170 BC.MIB->addAnnotation(Inst&: ReachingInst, Name: getProcessedICTag(), Val: 0U);
171
172 // Connect this block with the returning block of the caller
173 BinaryBasicBlock *CallerBlock = Info.getInsnToBBMap()[&ReachingInst];
174 BinaryBasicBlock *ReturnDestBlock =
175 Function.getLayout().getBasicBlockAfter(BB: CallerBlock);
176 BB.addSuccessor(Succ: ReturnDestBlock, Count: BB.getExecutionCount(), MispredictedCount: 0);
177 }
178 };
179
180 // This will connect blocks terminated with RETs to their respective
181 // internal caller return block. A note here: this is overly conservative
182 // because in nested calls, or unrelated calls, it will create edges
183 // connecting RETs to potentially unrelated internal calls. This is safe
184 // and if this causes a problem to recover the stack offsets properly, we
185 // will fail later.
186 for (BinaryBasicBlock &BB : Function) {
187 for (MCInst &Inst : BB) {
188 if (!BC.MIB->isReturn(Inst))
189 continue;
190
191 processReturns(BB, Inst);
192 }
193 }
194 return Updated;
195}
196
197bool ValidateInternalCalls::hasTailCallsInRange(
198 BinaryFunction &Function) const {
199 const BinaryContext &BC = Function.getBinaryContext();
200 for (BinaryBasicBlock &BB : Function)
201 for (MCInst &Inst : BB)
202 if (BC.MIB->isTailCall(Inst))
203 return true;
204 return false;
205}
206
207bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const {
208 fixCFGForPIC(Function);
209 while (fixCFGForIC(Function)) {
210 }
211
212 BinaryContext &BC = Function.getBinaryContext();
213 RegAnalysis RA = RegAnalysis(BC, nullptr, nullptr);
214 RA.setConservativeStrategy(RegAnalysis::ConservativeStrategy::CLOBBERS_NONE);
215 bool HasTailCalls = hasTailCallsInRange(Function);
216
217 for (BinaryBasicBlock &BB : Function) {
218 for (MCInst &Inst : BB) {
219 BinaryBasicBlock *Target = getInternalCallTarget(Function, Inst);
220 if (!Target || BC.MIB->hasAnnotation(Inst, Name: getProcessedICTag()))
221 continue;
222
223 if (HasTailCalls) {
224 LLVM_DEBUG(dbgs() << Function
225 << " has tail calls and internal calls.\n");
226 return false;
227 }
228
229 FrameIndexEntry FIE;
230 int32_t SrcImm = 0;
231 MCPhysReg Reg = 0;
232 int64_t StackOffset = 0;
233 bool IsIndexed = false;
234 MCInst *TargetInst = ProgramPoint::getFirstPointAt(BB&: *Target).getInst();
235 if (!BC.MIB->isStackAccess(Inst: *TargetInst, IsLoad&: FIE.IsLoad, IsStore&: FIE.IsStore,
236 IsStoreFromReg&: FIE.IsStoreFromReg, Reg, SrcImm,
237 StackPtrReg&: FIE.StackPtrReg, StackOffset, Size&: FIE.Size,
238 IsSimple&: FIE.IsSimple, IsIndexed)) {
239 LLVM_DEBUG({
240 dbgs() << "Frame analysis failed - not simple: " << Function << "\n";
241 Function.dump();
242 });
243 return false;
244 }
245 if (!FIE.IsLoad || FIE.StackPtrReg != BC.MIB->getStackPointer() ||
246 StackOffset != 0) {
247 LLVM_DEBUG({
248 dbgs() << "Target instruction does not fetch return address - not "
249 "simple: "
250 << Function << "\n";
251 Function.dump();
252 });
253 return false;
254 }
255 // Now track how the return address is used by tracking uses of Reg
256 ReachingDefOrUse</*Def=*/false> RU =
257 ReachingDefOrUse<false>(RA, Function, Reg);
258 RU.run();
259
260 int64_t Offset = static_cast<int64_t>(Target->getInputOffset());
261 bool UseDetected = false;
262 for (auto I = RU.expr_begin(BV: *RU.getStateBefore(Point: *TargetInst)),
263 E = RU.expr_end();
264 I != E; ++I) {
265 MCInst &Use = **I;
266 BitVector UsedRegs = BitVector(BC.MRI->getNumRegs(), false);
267 BC.MIB->getTouchedRegs(Inst: Use, Regs&: UsedRegs);
268 if (!UsedRegs[Reg])
269 continue;
270 UseDetected = true;
271 int64_t Output;
272 std::pair<MCPhysReg, int64_t> Input1 = std::make_pair(x&: Reg, y: 0);
273 std::pair<MCPhysReg, int64_t> Input2 = std::make_pair(x: 0, y: 0);
274 if (!BC.MIB->evaluateStackOffsetExpr(Inst: Use, Output, Input1, Input2)) {
275 LLVM_DEBUG(dbgs() << "Evaluate stack offset expr failed.\n");
276 return false;
277 }
278 if (Offset + Output < 0 ||
279 Offset + Output > static_cast<int64_t>(Function.getSize())) {
280 LLVM_DEBUG({
281 dbgs() << "Detected out-of-range PIC reference in " << Function
282 << "\nReturn address load: ";
283 BC.dump(*TargetInst);
284 dbgs() << "Use: ";
285 BC.dump(Use);
286 Function.dump();
287 });
288 return false;
289 }
290 LLVM_DEBUG({
291 dbgs() << "Validated access: ";
292 BC.dump(Use);
293 });
294 }
295 if (!UseDetected) {
296 LLVM_DEBUG(dbgs() << "No use detected.\n");
297 return false;
298 }
299 }
300 }
301 return true;
302}
303
304Error ValidateInternalCalls::runOnFunctions(BinaryContext &BC) {
305 if (!BC.isX86())
306 return Error::success();
307
308 // Look for functions that need validation. This should be pretty rare.
309 std::set<BinaryFunction *> NeedsValidation;
310 for (auto &BFI : BC.getBinaryFunctions()) {
311 BinaryFunction &Function = BFI.second;
312 for (BinaryBasicBlock &BB : Function) {
313 for (MCInst &Inst : BB) {
314 if (getInternalCallTarget(Function, Inst)) {
315 NeedsValidation.insert(x: &Function);
316 Function.setSimple(false);
317 break;
318 }
319 }
320 }
321 }
322
323 // Skip validation for non-relocation mode
324 if (!BC.HasRelocations)
325 return Error::success();
326
327 // Since few functions need validation, we can work with our most expensive
328 // algorithms here. Fix the CFG treating internal calls as unconditional
329 // jumps. This optimistically assumes this call is a PIC trick to get the PC
330 // value, so it is not really a call, but a jump. If we find that it's not the
331 // case, we mark this function as non-simple and stop processing it.
332 std::set<BinaryFunction *> Invalid;
333 for (BinaryFunction *Function : NeedsValidation) {
334 LLVM_DEBUG(dbgs() << "Validating " << *Function << "\n");
335 if (!analyzeFunction(Function&: *Function))
336 Invalid.insert(x: Function);
337 clearAnnotations(Function&: *Function);
338 }
339
340 if (!Invalid.empty()) {
341 BC.errs()
342 << "BOLT-WARNING: will skip the following function(s) as unsupported"
343 " internal calls were detected:\n";
344 for (BinaryFunction *Function : Invalid) {
345 BC.errs() << " " << *Function << "\n";
346 Function->setIgnored();
347 }
348 }
349 return Error::success();
350}
351
352} // namespace bolt
353} // namespace llvm
354

source code of bolt/lib/Passes/ValidateInternalCalls.cpp