1 | //===-- IRDynamicChecks.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/IR/Constants.h" |
10 | #include "llvm/IR/DataLayout.h" |
11 | #include "llvm/IR/Function.h" |
12 | #include "llvm/IR/Instructions.h" |
13 | #include "llvm/IR/Module.h" |
14 | #include "llvm/IR/Value.h" |
15 | #include "llvm/Support/raw_ostream.h" |
16 | |
17 | #include "IRDynamicChecks.h" |
18 | |
19 | #include "lldb/Expression/UtilityFunction.h" |
20 | #include "lldb/Target/ExecutionContext.h" |
21 | #include "lldb/Target/Process.h" |
22 | #include "lldb/Target/StackFrame.h" |
23 | #include "lldb/Target/Target.h" |
24 | #include "lldb/Utility/ConstString.h" |
25 | #include "lldb/Utility/LLDBLog.h" |
26 | #include "lldb/Utility/Log.h" |
27 | |
28 | #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" |
29 | |
30 | using namespace llvm; |
31 | using namespace lldb_private; |
32 | |
33 | static char ID; |
34 | |
35 | #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check" |
36 | #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check" |
37 | |
38 | static const char g_valid_pointer_check_text[] = |
39 | "extern \"C\" void\n" |
40 | "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n" |
41 | "{\n" |
42 | " unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n" |
43 | "}" ; |
44 | |
45 | ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions() |
46 | : DynamicCheckerFunctions(DCF_Clang) {} |
47 | |
48 | ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default; |
49 | |
50 | llvm::Error ClangDynamicCheckerFunctions::Install( |
51 | DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) { |
52 | Expected<std::unique_ptr<UtilityFunction>> utility_fn = |
53 | exe_ctx.GetTargetRef().CreateUtilityFunction( |
54 | expression: g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME, |
55 | language: lldb::eLanguageTypeC, exe_ctx); |
56 | if (!utility_fn) |
57 | return utility_fn.takeError(); |
58 | m_valid_pointer_check = std::move(*utility_fn); |
59 | |
60 | if (Process *process = exe_ctx.GetProcessPtr()) { |
61 | ObjCLanguageRuntime *objc_language_runtime = |
62 | ObjCLanguageRuntime::Get(process&: *process); |
63 | |
64 | if (objc_language_runtime) { |
65 | Expected<std::unique_ptr<UtilityFunction>> checker_fn = |
66 | objc_language_runtime->CreateObjectChecker(VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx); |
67 | if (!checker_fn) |
68 | return checker_fn.takeError(); |
69 | m_objc_object_check = std::move(*checker_fn); |
70 | } |
71 | } |
72 | |
73 | return Error::success(); |
74 | } |
75 | |
76 | bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr, |
77 | Stream &message) { |
78 | // FIXME: We have to get the checkers to know why they scotched the call in |
79 | // more detail, |
80 | // so we can print a better message here. |
81 | if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(address: addr)) { |
82 | message.Printf(format: "Attempted to dereference an invalid pointer." ); |
83 | return true; |
84 | } else if (m_objc_object_check && |
85 | m_objc_object_check->ContainsAddress(address: addr)) { |
86 | message.Printf(format: "Attempted to dereference an invalid ObjC Object or send it " |
87 | "an unrecognized selector" ); |
88 | return true; |
89 | } |
90 | return false; |
91 | } |
92 | |
93 | static std::string PrintValue(llvm::Value *V, bool truncate = false) { |
94 | std::string s; |
95 | raw_string_ostream rso(s); |
96 | V->print(O&: rso); |
97 | if (truncate) |
98 | s.resize(n: s.length() - 1); |
99 | return s; |
100 | } |
101 | |
102 | /// \class Instrumenter IRDynamicChecks.cpp |
103 | /// Finds and instruments individual LLVM IR instructions |
104 | /// |
105 | /// When instrumenting LLVM IR, it is frequently desirable to first search for |
106 | /// instructions, and then later modify them. This way iterators remain |
107 | /// intact, and multiple passes can look at the same code base without |
108 | /// treading on each other's toes. |
109 | /// |
110 | /// The Instrumenter class implements this functionality. A client first |
111 | /// calls Inspect on a function, which populates a list of instructions to be |
112 | /// instrumented. Then, later, when all passes' Inspect functions have been |
113 | /// called, the client calls Instrument, which adds the desired |
114 | /// instrumentation. |
115 | /// |
116 | /// A subclass of Instrumenter must override InstrumentInstruction, which |
117 | /// is responsible for adding whatever instrumentation is necessary. |
118 | /// |
119 | /// A subclass of Instrumenter may override: |
120 | /// |
121 | /// - InspectInstruction [default: does nothing] |
122 | /// |
123 | /// - InspectBasicBlock [default: iterates through the instructions in a |
124 | /// basic block calling InspectInstruction] |
125 | /// |
126 | /// - InspectFunction [default: iterates through the basic blocks in a |
127 | /// function calling InspectBasicBlock] |
128 | class Instrumenter { |
129 | public: |
130 | /// Constructor |
131 | /// |
132 | /// \param[in] module |
133 | /// The module being instrumented. |
134 | Instrumenter(llvm::Module &module, |
135 | std::shared_ptr<UtilityFunction> checker_function) |
136 | : m_module(module), m_checker_function(checker_function) {} |
137 | |
138 | virtual ~Instrumenter() = default; |
139 | |
140 | /// Inspect a function to find instructions to instrument |
141 | /// |
142 | /// \param[in] function |
143 | /// The function to inspect. |
144 | /// |
145 | /// \return |
146 | /// True on success; false on error. |
147 | bool Inspect(llvm::Function &function) { return InspectFunction(f&: function); } |
148 | |
149 | /// Instrument all the instructions found by Inspect() |
150 | /// |
151 | /// \return |
152 | /// True on success; false on error. |
153 | bool Instrument() { |
154 | for (InstIterator ii = m_to_instrument.begin(), |
155 | last_ii = m_to_instrument.end(); |
156 | ii != last_ii; ++ii) { |
157 | if (!InstrumentInstruction(inst: *ii)) |
158 | return false; |
159 | } |
160 | |
161 | return true; |
162 | } |
163 | |
164 | protected: |
165 | /// Add instrumentation to a single instruction |
166 | /// |
167 | /// \param[in] inst |
168 | /// The instruction to be instrumented. |
169 | /// |
170 | /// \return |
171 | /// True on success; false otherwise. |
172 | virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0; |
173 | |
174 | /// Register a single instruction to be instrumented |
175 | /// |
176 | /// \param[in] inst |
177 | /// The instruction to be instrumented. |
178 | void RegisterInstruction(llvm::Instruction &inst) { |
179 | m_to_instrument.push_back(x: &inst); |
180 | } |
181 | |
182 | /// Determine whether a single instruction is interesting to instrument, |
183 | /// and, if so, call RegisterInstruction |
184 | /// |
185 | /// \param[in] i |
186 | /// The instruction to be inspected. |
187 | /// |
188 | /// \return |
189 | /// False if there was an error scanning; true otherwise. |
190 | virtual bool InspectInstruction(llvm::Instruction &i) { return true; } |
191 | |
192 | /// Scan a basic block to see if any instructions are interesting |
193 | /// |
194 | /// \param[in] bb |
195 | /// The basic block to be inspected. |
196 | /// |
197 | /// \return |
198 | /// False if there was an error scanning; true otherwise. |
199 | virtual bool InspectBasicBlock(llvm::BasicBlock &bb) { |
200 | for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end(); |
201 | ii != last_ii; ++ii) { |
202 | if (!InspectInstruction(i&: *ii)) |
203 | return false; |
204 | } |
205 | |
206 | return true; |
207 | } |
208 | |
209 | /// Scan a function to see if any instructions are interesting |
210 | /// |
211 | /// \param[in] f |
212 | /// The function to be inspected. |
213 | /// |
214 | /// \return |
215 | /// False if there was an error scanning; true otherwise. |
216 | virtual bool InspectFunction(llvm::Function &f) { |
217 | for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end(); |
218 | bbi != last_bbi; ++bbi) { |
219 | if (!InspectBasicBlock(bb&: *bbi)) |
220 | return false; |
221 | } |
222 | |
223 | return true; |
224 | } |
225 | |
226 | /// Build a function pointer for a function with signature void |
227 | /// (*)(uint8_t*) with a given address |
228 | /// |
229 | /// \param[in] start_address |
230 | /// The address of the function. |
231 | /// |
232 | /// \return |
233 | /// The function pointer, for use in a CallInst. |
234 | llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) { |
235 | llvm::Type *param_array[1]; |
236 | |
237 | param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy()); |
238 | |
239 | ArrayRef<llvm::Type *> params(param_array, 1); |
240 | |
241 | FunctionType *fun_ty = FunctionType::get( |
242 | Result: llvm::Type::getVoidTy(C&: m_module.getContext()), Params: params, isVarArg: true); |
243 | PointerType *fun_ptr_ty = PointerType::getUnqual(C&: m_module.getContext()); |
244 | Constant *fun_addr_int = |
245 | ConstantInt::get(Ty: GetIntptrTy(), V: start_address, IsSigned: false); |
246 | return {fun_ty, ConstantExpr::getIntToPtr(C: fun_addr_int, Ty: fun_ptr_ty)}; |
247 | } |
248 | |
249 | /// Build a function pointer for a function with signature void |
250 | /// (*)(uint8_t*, uint8_t*) with a given address |
251 | /// |
252 | /// \param[in] start_address |
253 | /// The address of the function. |
254 | /// |
255 | /// \return |
256 | /// The function pointer, for use in a CallInst. |
257 | llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) { |
258 | llvm::Type *param_array[2]; |
259 | |
260 | param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy()); |
261 | param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy()); |
262 | |
263 | ArrayRef<llvm::Type *> params(param_array, 2); |
264 | |
265 | FunctionType *fun_ty = FunctionType::get( |
266 | Result: llvm::Type::getVoidTy(C&: m_module.getContext()), Params: params, isVarArg: true); |
267 | PointerType *fun_ptr_ty = PointerType::getUnqual(C&: m_module.getContext()); |
268 | Constant *fun_addr_int = |
269 | ConstantInt::get(Ty: GetIntptrTy(), V: start_address, IsSigned: false); |
270 | return {fun_ty, ConstantExpr::getIntToPtr(C: fun_addr_int, Ty: fun_ptr_ty)}; |
271 | } |
272 | |
273 | PointerType *GetI8PtrTy() { |
274 | if (!m_i8ptr_ty) |
275 | m_i8ptr_ty = llvm::PointerType::getUnqual(C&: m_module.getContext()); |
276 | |
277 | return m_i8ptr_ty; |
278 | } |
279 | |
280 | IntegerType *GetIntptrTy() { |
281 | if (!m_intptr_ty) { |
282 | m_intptr_ty = llvm::Type::getIntNTy( |
283 | C&: m_module.getContext(), |
284 | N: m_module.getDataLayout().getPointerSizeInBits()); |
285 | } |
286 | |
287 | return m_intptr_ty; |
288 | } |
289 | |
290 | typedef std::vector<llvm::Instruction *> InstVector; |
291 | typedef InstVector::iterator InstIterator; |
292 | |
293 | InstVector m_to_instrument; ///< List of instructions the inspector found |
294 | llvm::Module &m_module; ///< The module which is being instrumented |
295 | std::shared_ptr<UtilityFunction> |
296 | m_checker_function; ///< The dynamic checker function for the process |
297 | |
298 | private: |
299 | PointerType *m_i8ptr_ty = nullptr; |
300 | IntegerType *m_intptr_ty = nullptr; |
301 | }; |
302 | |
303 | class ValidPointerChecker : public Instrumenter { |
304 | public: |
305 | ValidPointerChecker(llvm::Module &module, |
306 | std::shared_ptr<UtilityFunction> checker_function) |
307 | : Instrumenter(module, checker_function), |
308 | m_valid_pointer_check_func(nullptr) {} |
309 | |
310 | ~ValidPointerChecker() override = default; |
311 | |
312 | protected: |
313 | bool InstrumentInstruction(llvm::Instruction *inst) override { |
314 | Log *log = GetLog(mask: LLDBLog::Expressions); |
315 | |
316 | LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n" , |
317 | PrintValue(inst).c_str()); |
318 | |
319 | if (!m_valid_pointer_check_func) |
320 | m_valid_pointer_check_func = |
321 | BuildPointerValidatorFunc(start_address: m_checker_function->StartAddress()); |
322 | |
323 | llvm::Value *dereferenced_ptr = nullptr; |
324 | |
325 | if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(Val: inst)) |
326 | dereferenced_ptr = li->getPointerOperand(); |
327 | else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(Val: inst)) |
328 | dereferenced_ptr = si->getPointerOperand(); |
329 | else |
330 | return false; |
331 | |
332 | // Insert an instruction to call the helper with the result |
333 | CallInst::Create(Func: m_valid_pointer_check_func, Args: dereferenced_ptr, NameStr: "" , |
334 | InsertBefore: inst->getIterator()); |
335 | |
336 | return true; |
337 | } |
338 | |
339 | bool InspectInstruction(llvm::Instruction &i) override { |
340 | if (isa<llvm::LoadInst>(Val: &i) || isa<llvm::StoreInst>(Val: &i)) |
341 | RegisterInstruction(inst&: i); |
342 | |
343 | return true; |
344 | } |
345 | |
346 | private: |
347 | llvm::FunctionCallee m_valid_pointer_check_func; |
348 | }; |
349 | |
350 | class ObjcObjectChecker : public Instrumenter { |
351 | public: |
352 | ObjcObjectChecker(llvm::Module &module, |
353 | std::shared_ptr<UtilityFunction> checker_function) |
354 | : Instrumenter(module, checker_function), |
355 | m_objc_object_check_func(nullptr) {} |
356 | |
357 | ~ObjcObjectChecker() override = default; |
358 | |
359 | enum msgSend_type { |
360 | eMsgSend = 0, |
361 | eMsgSendSuper, |
362 | eMsgSendSuper_stret, |
363 | eMsgSend_fpret, |
364 | eMsgSend_stret |
365 | }; |
366 | |
367 | std::map<llvm::Instruction *, msgSend_type> msgSend_types; |
368 | |
369 | protected: |
370 | bool InstrumentInstruction(llvm::Instruction *inst) override { |
371 | CallInst *call_inst = dyn_cast<CallInst>(Val: inst); |
372 | |
373 | if (!call_inst) |
374 | return false; // call_inst really shouldn't be nullptr, because otherwise |
375 | // InspectInstruction wouldn't have registered it |
376 | |
377 | if (!m_objc_object_check_func) |
378 | m_objc_object_check_func = |
379 | BuildObjectCheckerFunc(start_address: m_checker_function->StartAddress()); |
380 | |
381 | // id objc_msgSend(id theReceiver, SEL theSelector, ...) |
382 | |
383 | llvm::Value *target_object; |
384 | llvm::Value *selector; |
385 | |
386 | switch (msgSend_types[inst]) { |
387 | case eMsgSend: |
388 | case eMsgSend_fpret: |
389 | // On arm64, clang uses objc_msgSend for scalar and struct return |
390 | // calls. The call instruction will record which was used. |
391 | if (call_inst->hasStructRetAttr()) { |
392 | target_object = call_inst->getArgOperand(i: 1); |
393 | selector = call_inst->getArgOperand(i: 2); |
394 | } else { |
395 | target_object = call_inst->getArgOperand(i: 0); |
396 | selector = call_inst->getArgOperand(i: 1); |
397 | } |
398 | break; |
399 | case eMsgSend_stret: |
400 | target_object = call_inst->getArgOperand(i: 1); |
401 | selector = call_inst->getArgOperand(i: 2); |
402 | break; |
403 | case eMsgSendSuper: |
404 | case eMsgSendSuper_stret: |
405 | return true; |
406 | } |
407 | |
408 | // These objects should always be valid according to Sean Calannan |
409 | assert(target_object); |
410 | assert(selector); |
411 | |
412 | // Insert an instruction to call the helper with the result |
413 | |
414 | llvm::Value *arg_array[2]; |
415 | |
416 | arg_array[0] = target_object; |
417 | arg_array[1] = selector; |
418 | |
419 | ArrayRef<llvm::Value *> args(arg_array, 2); |
420 | |
421 | CallInst::Create(Func: m_objc_object_check_func, Args: args, NameStr: "" , InsertBefore: inst->getIterator()); |
422 | |
423 | return true; |
424 | } |
425 | |
426 | static llvm::Function *GetFunction(llvm::Value *value) { |
427 | if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(Val: value)) { |
428 | return function; |
429 | } |
430 | |
431 | if (llvm::ConstantExpr *const_expr = |
432 | llvm::dyn_cast<llvm::ConstantExpr>(Val: value)) { |
433 | switch (const_expr->getOpcode()) { |
434 | default: |
435 | return nullptr; |
436 | case llvm::Instruction::BitCast: |
437 | return GetFunction(value: const_expr->getOperand(i_nocapture: 0)); |
438 | } |
439 | } |
440 | |
441 | return nullptr; |
442 | } |
443 | |
444 | static llvm::Function *GetCalledFunction(llvm::CallInst *inst) { |
445 | return GetFunction(value: inst->getCalledOperand()); |
446 | } |
447 | |
448 | bool InspectInstruction(llvm::Instruction &i) override { |
449 | Log *log = GetLog(mask: LLDBLog::Expressions); |
450 | |
451 | CallInst *call_inst = dyn_cast<CallInst>(Val: &i); |
452 | |
453 | if (call_inst) { |
454 | const llvm::Function *called_function = GetCalledFunction(inst: call_inst); |
455 | |
456 | if (!called_function) |
457 | return true; |
458 | |
459 | std::string name_str = called_function->getName().str(); |
460 | const char *name_cstr = name_str.c_str(); |
461 | |
462 | LLDB_LOGF(log, "Found call to %s: %s\n" , name_cstr, |
463 | PrintValue(call_inst).c_str()); |
464 | |
465 | if (name_str.find(s: "objc_msgSend" ) == std::string::npos) |
466 | return true; |
467 | |
468 | if (!strcmp(s1: name_cstr, s2: "objc_msgSend" )) { |
469 | RegisterInstruction(inst&: i); |
470 | msgSend_types[&i] = eMsgSend; |
471 | return true; |
472 | } |
473 | |
474 | if (!strcmp(s1: name_cstr, s2: "objc_msgSend_stret" )) { |
475 | RegisterInstruction(inst&: i); |
476 | msgSend_types[&i] = eMsgSend_stret; |
477 | return true; |
478 | } |
479 | |
480 | if (!strcmp(s1: name_cstr, s2: "objc_msgSend_fpret" )) { |
481 | RegisterInstruction(inst&: i); |
482 | msgSend_types[&i] = eMsgSend_fpret; |
483 | return true; |
484 | } |
485 | |
486 | if (!strcmp(s1: name_cstr, s2: "objc_msgSendSuper" )) { |
487 | RegisterInstruction(inst&: i); |
488 | msgSend_types[&i] = eMsgSendSuper; |
489 | return true; |
490 | } |
491 | |
492 | if (!strcmp(s1: name_cstr, s2: "objc_msgSendSuper_stret" )) { |
493 | RegisterInstruction(inst&: i); |
494 | msgSend_types[&i] = eMsgSendSuper_stret; |
495 | return true; |
496 | } |
497 | |
498 | LLDB_LOGF(log, |
499 | "Function name '%s' contains 'objc_msgSend' but is not handled" , |
500 | name_str.c_str()); |
501 | |
502 | return true; |
503 | } |
504 | |
505 | return true; |
506 | } |
507 | |
508 | private: |
509 | llvm::FunctionCallee m_objc_object_check_func; |
510 | }; |
511 | |
512 | IRDynamicChecks::IRDynamicChecks( |
513 | ClangDynamicCheckerFunctions &checker_functions, const char *func_name) |
514 | : ModulePass(ID), m_func_name(func_name), |
515 | m_checker_functions(checker_functions) {} |
516 | |
517 | IRDynamicChecks::~IRDynamicChecks() = default; |
518 | |
519 | bool IRDynamicChecks::runOnModule(llvm::Module &M) { |
520 | Log *log = GetLog(mask: LLDBLog::Expressions); |
521 | |
522 | llvm::Function *function = M.getFunction(Name: StringRef(m_func_name)); |
523 | |
524 | if (!function) { |
525 | LLDB_LOGF(log, "Couldn't find %s() in the module" , m_func_name.c_str()); |
526 | |
527 | return false; |
528 | } |
529 | |
530 | if (m_checker_functions.m_valid_pointer_check) { |
531 | ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check); |
532 | |
533 | if (!vpc.Inspect(function&: *function)) |
534 | return false; |
535 | |
536 | if (!vpc.Instrument()) |
537 | return false; |
538 | } |
539 | |
540 | if (m_checker_functions.m_objc_object_check) { |
541 | ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check); |
542 | |
543 | if (!ooc.Inspect(function&: *function)) |
544 | return false; |
545 | |
546 | if (!ooc.Instrument()) |
547 | return false; |
548 | } |
549 | |
550 | if (log && log->GetVerbose()) { |
551 | std::string s; |
552 | raw_string_ostream oss(s); |
553 | |
554 | M.print(OS&: oss, AAW: nullptr); |
555 | |
556 | LLDB_LOGF(log, "Module after dynamic checks: \n%s" , s.c_str()); |
557 | } |
558 | |
559 | return true; |
560 | } |
561 | |
562 | void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {} |
563 | |
564 | PassManagerType IRDynamicChecks::getPotentialPassManagerType() const { |
565 | return PMT_ModulePassManager; |
566 | } |
567 | |