1//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions perform manipulations on Modules.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Utils/ModuleUtils.h"
14#include "llvm/Analysis/VectorUtils.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/IR/DerivedTypes.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/MDBuilder.h"
20#include "llvm/IR/Module.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/Support/xxhash.h"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "moduleutils"
27
28static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
29 int Priority, Constant *Data) {
30 IRBuilder<> IRB(M.getContext());
31 FunctionType *FnTy = FunctionType::get(Result: IRB.getVoidTy(), isVarArg: false);
32
33 // Get the current set of static global constructors and add the new ctor
34 // to the list.
35 SmallVector<Constant *, 16> CurrentCtors;
36 StructType *EltTy;
37 if (GlobalVariable *GVCtor = M.getNamedGlobal(Name: ArrayName)) {
38 EltTy = cast<StructType>(Val: GVCtor->getValueType()->getArrayElementType());
39 if (Constant *Init = GVCtor->getInitializer()) {
40 unsigned n = Init->getNumOperands();
41 CurrentCtors.reserve(N: n + 1);
42 for (unsigned i = 0; i != n; ++i)
43 CurrentCtors.push_back(Elt: cast<Constant>(Val: Init->getOperand(i)));
44 }
45 GVCtor->eraseFromParent();
46 } else {
47 EltTy = StructType::get(elt1: IRB.getInt32Ty(),
48 elts: PointerType::get(ElementType: FnTy, AddressSpace: F->getAddressSpace()),
49 elts: IRB.getPtrTy());
50 }
51
52 // Build a 3 field global_ctor entry. We don't take a comdat key.
53 Constant *CSVals[3];
54 CSVals[0] = IRB.getInt32(C: Priority);
55 CSVals[1] = F;
56 CSVals[2] = Data ? ConstantExpr::getPointerCast(C: Data, Ty: IRB.getPtrTy())
57 : Constant::getNullValue(Ty: IRB.getPtrTy());
58 Constant *RuntimeCtorInit =
59 ConstantStruct::get(T: EltTy, V: ArrayRef(CSVals, EltTy->getNumElements()));
60
61 CurrentCtors.push_back(Elt: RuntimeCtorInit);
62
63 // Create a new initializer.
64 ArrayType *AT = ArrayType::get(ElementType: EltTy, NumElements: CurrentCtors.size());
65 Constant *NewInit = ConstantArray::get(T: AT, V: CurrentCtors);
66
67 // Create the new global variable and replace all uses of
68 // the old global variable with the new one.
69 (void)new GlobalVariable(M, NewInit->getType(), false,
70 GlobalValue::AppendingLinkage, NewInit, ArrayName);
71}
72
73void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
74 appendToGlobalArray(ArrayName: "llvm.global_ctors", M, F, Priority, Data);
75}
76
77void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
78 appendToGlobalArray(ArrayName: "llvm.global_dtors", M, F, Priority, Data);
79}
80
81static void collectUsedGlobals(GlobalVariable *GV,
82 SmallSetVector<Constant *, 16> &Init) {
83 if (!GV || !GV->hasInitializer())
84 return;
85
86 auto *CA = cast<ConstantArray>(Val: GV->getInitializer());
87 for (Use &Op : CA->operands())
88 Init.insert(X: cast<Constant>(Val&: Op));
89}
90
91static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
92 GlobalVariable *GV = M.getGlobalVariable(Name);
93
94 SmallSetVector<Constant *, 16> Init;
95 collectUsedGlobals(GV, Init);
96 if (GV)
97 GV->eraseFromParent();
98
99 Type *ArrayEltTy = llvm::PointerType::getUnqual(C&: M.getContext());
100 for (auto *V : Values)
101 Init.insert(X: ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: V, Ty: ArrayEltTy));
102
103 if (Init.empty())
104 return;
105
106 ArrayType *ATy = ArrayType::get(ElementType: ArrayEltTy, NumElements: Init.size());
107 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
108 ConstantArray::get(T: ATy, V: Init.getArrayRef()),
109 Name);
110 GV->setSection("llvm.metadata");
111}
112
113void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
114 appendToUsedList(M, Name: "llvm.used", Values);
115}
116
117void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
118 appendToUsedList(M, Name: "llvm.compiler.used", Values);
119}
120
121static void removeFromUsedList(Module &M, StringRef Name,
122 function_ref<bool(Constant *)> ShouldRemove) {
123 GlobalVariable *GV = M.getNamedGlobal(Name);
124 if (!GV)
125 return;
126
127 SmallSetVector<Constant *, 16> Init;
128 collectUsedGlobals(GV, Init);
129
130 Type *ArrayEltTy = cast<ArrayType>(Val: GV->getValueType())->getElementType();
131
132 SmallVector<Constant *, 16> NewInit;
133 for (Constant *MaybeRemoved : Init) {
134 if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
135 NewInit.push_back(Elt: MaybeRemoved);
136 }
137
138 if (!NewInit.empty()) {
139 ArrayType *ATy = ArrayType::get(ElementType: ArrayEltTy, NumElements: NewInit.size());
140 GlobalVariable *NewGV =
141 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
142 ConstantArray::get(T: ATy, V: NewInit), "", GV,
143 GV->getThreadLocalMode(), GV->getAddressSpace());
144 NewGV->setSection(GV->getSection());
145 NewGV->takeName(V: GV);
146 }
147
148 GV->eraseFromParent();
149}
150
151void llvm::removeFromUsedLists(Module &M,
152 function_ref<bool(Constant *)> ShouldRemove) {
153 removeFromUsedList(M, Name: "llvm.used", ShouldRemove);
154 removeFromUsedList(M, Name: "llvm.compiler.used", ShouldRemove);
155}
156
157void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
158 if (!M.getModuleFlag(Key: "kcfi"))
159 return;
160 // Matches CodeGenModule::CreateKCFITypeId in Clang.
161 LLVMContext &Ctx = M.getContext();
162 MDBuilder MDB(Ctx);
163 F.setMetadata(
164 KindID: LLVMContext::MD_kcfi_type,
165 Node: MDNode::get(Context&: Ctx, MDs: MDB.createConstant(C: ConstantInt::get(
166 Ty: Type::getInt32Ty(C&: Ctx),
167 V: static_cast<uint32_t>(xxHash64(Data: MangledType))))));
168 // If the module was compiled with -fpatchable-function-entry, ensure
169 // we use the same patchable-function-prefix.
170 if (auto *MD = mdconst::extract_or_null<ConstantInt>(
171 MD: M.getModuleFlag(Key: "kcfi-offset"))) {
172 if (unsigned Offset = MD->getZExtValue())
173 F.addFnAttr(Kind: "patchable-function-prefix", Val: std::to_string(val: Offset));
174 }
175}
176
177FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
178 ArrayRef<Type *> InitArgTypes,
179 bool Weak) {
180 assert(!InitName.empty() && "Expected init function name");
181 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
182 auto *FnTy = FunctionType::get(Result: VoidTy, Params: InitArgTypes, isVarArg: false);
183 auto FnCallee = M.getOrInsertFunction(Name: InitName, T: FnTy);
184 auto *Fn = cast<Function>(Val: FnCallee.getCallee());
185 if (Weak && Fn->isDeclaration())
186 Fn->setLinkage(Function::ExternalWeakLinkage);
187 return FnCallee;
188}
189
190Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
191 Function *Ctor = Function::createWithDefaultAttr(
192 Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false),
193 Linkage: GlobalValue::InternalLinkage, AddrSpace: M.getDataLayout().getProgramAddressSpace(),
194 N: CtorName, M: &M);
195 Ctor->addFnAttr(Attribute::NoUnwind);
196 setKCFIType(M, F&: *Ctor, MangledType: "_ZTSFvvE"); // void (*)(void)
197 BasicBlock *CtorBB = BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: Ctor);
198 ReturnInst::Create(C&: M.getContext(), InsertAtEnd: CtorBB);
199 // Ensure Ctor cannot be discarded, even if in a comdat.
200 appendToUsed(M, Values: {Ctor});
201 return Ctor;
202}
203
204std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
205 Module &M, StringRef CtorName, StringRef InitName,
206 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
207 StringRef VersionCheckName, bool Weak) {
208 assert(!InitName.empty() && "Expected init function name");
209 assert(InitArgs.size() == InitArgTypes.size() &&
210 "Sanitizer's init function expects different number of arguments");
211 FunctionCallee InitFunction =
212 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
213 Function *Ctor = createSanitizerCtor(M, CtorName);
214 IRBuilder<> IRB(M.getContext());
215
216 BasicBlock *RetBB = &Ctor->getEntryBlock();
217 if (Weak) {
218 RetBB->setName("ret");
219 auto *EntryBB = BasicBlock::Create(Context&: M.getContext(), Name: "entry", Parent: Ctor, InsertBefore: RetBB);
220 auto *CallInitBB =
221 BasicBlock::Create(Context&: M.getContext(), Name: "callfunc", Parent: Ctor, InsertBefore: RetBB);
222 auto *InitFn = cast<Function>(Val: InitFunction.getCallee());
223 auto *InitFnPtr =
224 PointerType::get(ElementType: InitFn->getType(), AddressSpace: InitFn->getAddressSpace());
225 IRB.SetInsertPoint(EntryBB);
226 Value *InitNotNull =
227 IRB.CreateICmpNE(LHS: InitFn, RHS: ConstantPointerNull::get(T: InitFnPtr));
228 IRB.CreateCondBr(Cond: InitNotNull, True: CallInitBB, False: RetBB);
229 IRB.SetInsertPoint(CallInitBB);
230 } else {
231 IRB.SetInsertPoint(RetBB->getTerminator());
232 }
233
234 IRB.CreateCall(Callee: InitFunction, Args: InitArgs);
235 if (!VersionCheckName.empty()) {
236 FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
237 Name: VersionCheckName, T: FunctionType::get(Result: IRB.getVoidTy(), Params: {}, isVarArg: false),
238 AttributeList: AttributeList());
239 IRB.CreateCall(Callee: VersionCheckFunction, Args: {});
240 }
241
242 if (Weak)
243 IRB.CreateBr(Dest: RetBB);
244
245 return std::make_pair(x&: Ctor, y&: InitFunction);
246}
247
248std::pair<Function *, FunctionCallee>
249llvm::getOrCreateSanitizerCtorAndInitFunctions(
250 Module &M, StringRef CtorName, StringRef InitName,
251 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
252 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
253 StringRef VersionCheckName, bool Weak) {
254 assert(!CtorName.empty() && "Expected ctor function name");
255
256 if (Function *Ctor = M.getFunction(Name: CtorName))
257 // FIXME: Sink this logic into the module, similar to the handling of
258 // globals. This will make moving to a concurrent model much easier.
259 if (Ctor->arg_empty() ||
260 Ctor->getReturnType() == Type::getVoidTy(C&: M.getContext()))
261 return {Ctor,
262 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
263
264 Function *Ctor;
265 FunctionCallee InitFunction;
266 std::tie(args&: Ctor, args&: InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
267 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
268 FunctionsCreatedCallback(Ctor, InitFunction);
269 return std::make_pair(x&: Ctor, y&: InitFunction);
270}
271
272void llvm::filterDeadComdatFunctions(
273 SmallVectorImpl<Function *> &DeadComdatFunctions) {
274 SmallPtrSet<Function *, 32> MaybeDeadFunctions;
275 SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
276 for (Function *F : DeadComdatFunctions) {
277 MaybeDeadFunctions.insert(Ptr: F);
278 if (Comdat *C = F->getComdat())
279 MaybeDeadComdats.insert(Ptr: C);
280 }
281
282 // Find comdats for which all users are dead now.
283 SmallPtrSet<Comdat *, 32> DeadComdats;
284 for (Comdat *C : MaybeDeadComdats) {
285 auto IsUserDead = [&](GlobalObject *GO) {
286 auto *F = dyn_cast<Function>(Val: GO);
287 return F && MaybeDeadFunctions.contains(Ptr: F);
288 };
289 if (all_of(Range: C->getUsers(), P: IsUserDead))
290 DeadComdats.insert(Ptr: C);
291 }
292
293 // Only keep functions which have no comdat or a dead comdat.
294 erase_if(C&: DeadComdatFunctions, P: [&](Function *F) {
295 Comdat *C = F->getComdat();
296 return C && !DeadComdats.contains(Ptr: C);
297 });
298}
299
300std::string llvm::getUniqueModuleId(Module *M) {
301 MD5 Md5;
302 bool ExportsSymbols = false;
303 auto AddGlobal = [&](GlobalValue &GV) {
304 if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm.") ||
305 !GV.hasExternalLinkage() || GV.hasComdat())
306 return;
307 ExportsSymbols = true;
308 Md5.update(Str: GV.getName());
309 Md5.update(Data: ArrayRef<uint8_t>{0});
310 };
311
312 for (auto &F : *M)
313 AddGlobal(F);
314 for (auto &GV : M->globals())
315 AddGlobal(GV);
316 for (auto &GA : M->aliases())
317 AddGlobal(GA);
318 for (auto &IF : M->ifuncs())
319 AddGlobal(IF);
320
321 if (!ExportsSymbols)
322 return "";
323
324 MD5::MD5Result R;
325 Md5.final(Result&: R);
326
327 SmallString<32> Str;
328 MD5::stringifyResult(Result&: R, Str);
329 return ("." + Str).str();
330}
331
332void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
333 StringRef SectionName, Align Alignment) {
334 // Embed the memory buffer into the module.
335 Constant *ModuleConstant = ConstantDataArray::get(
336 Context&: M.getContext(), Elts: ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
337 GlobalVariable *GV = new GlobalVariable(
338 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
339 ModuleConstant, "llvm.embedded.object");
340 GV->setSection(SectionName);
341 GV->setAlignment(Alignment);
342
343 LLVMContext &Ctx = M.getContext();
344 NamedMDNode *MD = M.getOrInsertNamedMetadata(Name: "llvm.embedded.objects");
345 Metadata *MDVals[] = {ConstantAsMetadata::get(C: GV),
346 MDString::get(Context&: Ctx, Str: SectionName)};
347
348 MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
349 GV->setMetadata(KindID: LLVMContext::MD_exclude, Node: llvm::MDNode::get(Context&: Ctx, MDs: {}));
350
351 appendToCompilerUsed(M, Values: GV);
352}
353
354bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
355 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
356 SmallVector<GlobalIFunc *, 32> AllIFuncs;
357 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
358 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
359 for (GlobalIFunc &GI : M.ifuncs())
360 AllIFuncs.push_back(Elt: &GI);
361 IFuncsToLower = AllIFuncs;
362 }
363
364 bool UnhandledUsers = false;
365 LLVMContext &Ctx = M.getContext();
366 const DataLayout &DL = M.getDataLayout();
367
368 PointerType *TableEntryTy =
369 PointerType::get(C&: Ctx, AddressSpace: DL.getProgramAddressSpace());
370
371 ArrayType *FuncPtrTableTy =
372 ArrayType::get(ElementType: TableEntryTy, NumElements: IFuncsToLower.size());
373
374 Align PtrAlign = DL.getABITypeAlign(Ty: TableEntryTy);
375
376 // Create a global table of function pointers we'll initialize in a global
377 // constructor.
378 auto *FuncPtrTable = new GlobalVariable(
379 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
380 PoisonValue::get(T: FuncPtrTableTy), "", nullptr,
381 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
382 FuncPtrTable->setAlignment(PtrAlign);
383
384 // Create a function to initialize the function pointer table.
385 Function *NewCtor = Function::Create(
386 Ty: FunctionType::get(Result: Type::getVoidTy(C&: Ctx), isVarArg: false), Linkage: Function::InternalLinkage,
387 AddrSpace: DL.getProgramAddressSpace(), N: "", M: &M);
388
389 BasicBlock *BB = BasicBlock::Create(Context&: Ctx, Name: "", Parent: NewCtor);
390 IRBuilder<> InitBuilder(BB);
391
392 size_t TableIndex = 0;
393 for (GlobalIFunc *GI : IFuncsToLower) {
394 Function *ResolvedFunction = GI->getResolverFunction();
395
396 // We don't know what to pass to a resolver function taking arguments
397 //
398 // FIXME: Is this even valid? clang and gcc don't complain but this
399 // probably should be invalid IR. We could just pass through undef.
400 if (!std::empty(cont: ResolvedFunction->getFunctionType()->params())) {
401 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
402 << ResolvedFunction->getName() << " with parameters\n");
403 UnhandledUsers = true;
404 continue;
405 }
406
407 // Initialize the function pointer table.
408 CallInst *ResolvedFunc = InitBuilder.CreateCall(Callee: ResolvedFunction);
409 Value *Casted = InitBuilder.CreatePointerCast(V: ResolvedFunc, DestTy: TableEntryTy);
410 Constant *GEP = cast<Constant>(Val: InitBuilder.CreateConstInBoundsGEP2_32(
411 Ty: FuncPtrTableTy, Ptr: FuncPtrTable, Idx0: 0, Idx1: TableIndex++));
412 InitBuilder.CreateAlignedStore(Val: Casted, Ptr: GEP, Align: PtrAlign);
413
414 // Update all users to load a pointer from the global table.
415 for (User *User : make_early_inc_range(Range: GI->users())) {
416 Instruction *UserInst = dyn_cast<Instruction>(Val: User);
417 if (!UserInst) {
418 // TODO: Should handle constantexpr casts in user instructions. Probably
419 // can't do much about constant initializers.
420 UnhandledUsers = true;
421 continue;
422 }
423
424 IRBuilder<> UseBuilder(UserInst);
425 LoadInst *ResolvedTarget =
426 UseBuilder.CreateAlignedLoad(Ty: TableEntryTy, Ptr: GEP, Align: PtrAlign);
427 Value *ResolvedCast =
428 UseBuilder.CreatePointerCast(V: ResolvedTarget, DestTy: GI->getType());
429 UserInst->replaceUsesOfWith(From: GI, To: ResolvedCast);
430 }
431
432 // If we handled all users, erase the ifunc.
433 if (GI->use_empty())
434 GI->eraseFromParent();
435 }
436
437 InitBuilder.CreateRetVoid();
438
439 PointerType *ConstantDataTy = PointerType::get(C&: Ctx, AddressSpace: 0);
440
441 // TODO: Is this the right priority? Probably should be before any other
442 // constructors?
443 const int Priority = 10;
444 appendToGlobalCtors(M, F: NewCtor, Priority,
445 Data: ConstantPointerNull::get(T: ConstantDataTy));
446 return UnhandledUsers;
447}
448

source code of llvm/lib/Transforms/Utils/ModuleUtils.cpp