1 | //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass builds a ModuleSummaryIndex object for the module, to be written |
10 | // to bitcode or LLVM assembly. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SetVector.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
24 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
25 | #include "llvm/Analysis/ConstantFolding.h" |
26 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
27 | #include "llvm/Analysis/LoopInfo.h" |
28 | #include "llvm/Analysis/MemoryProfileInfo.h" |
29 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
30 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
31 | #include "llvm/Analysis/TypeMetadataUtils.h" |
32 | #include "llvm/IR/Attributes.h" |
33 | #include "llvm/IR/BasicBlock.h" |
34 | #include "llvm/IR/Constant.h" |
35 | #include "llvm/IR/Constants.h" |
36 | #include "llvm/IR/Dominators.h" |
37 | #include "llvm/IR/Function.h" |
38 | #include "llvm/IR/GlobalAlias.h" |
39 | #include "llvm/IR/GlobalValue.h" |
40 | #include "llvm/IR/GlobalVariable.h" |
41 | #include "llvm/IR/Instructions.h" |
42 | #include "llvm/IR/IntrinsicInst.h" |
43 | #include "llvm/IR/Metadata.h" |
44 | #include "llvm/IR/Module.h" |
45 | #include "llvm/IR/ModuleSummaryIndex.h" |
46 | #include "llvm/IR/Use.h" |
47 | #include "llvm/IR/User.h" |
48 | #include "llvm/InitializePasses.h" |
49 | #include "llvm/Object/ModuleSymbolTable.h" |
50 | #include "llvm/Object/SymbolicFile.h" |
51 | #include "llvm/Pass.h" |
52 | #include "llvm/Support/Casting.h" |
53 | #include "llvm/Support/CommandLine.h" |
54 | #include "llvm/Support/FileSystem.h" |
55 | #include <algorithm> |
56 | #include <cassert> |
57 | #include <cstdint> |
58 | #include <vector> |
59 | |
60 | using namespace llvm; |
61 | using namespace llvm::memprof; |
62 | |
63 | #define DEBUG_TYPE "module-summary-analysis" |
64 | |
65 | // Option to force edges cold which will block importing when the |
66 | // -import-cold-multiplier is set to 0. Useful for debugging. |
67 | namespace llvm { |
68 | FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold = |
69 | FunctionSummary::FSHT_None; |
70 | } // namespace llvm |
71 | |
72 | static cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC( |
73 | "force-summary-edges-cold" , cl::Hidden, cl::location(L&: ForceSummaryEdgesCold), |
74 | cl::desc("Force all edges in the function summary to cold" ), |
75 | cl::values(clEnumValN(FunctionSummary::FSHT_None, "none" , "None." ), |
76 | clEnumValN(FunctionSummary::FSHT_AllNonCritical, |
77 | "all-non-critical" , "All non-critical edges." ), |
78 | clEnumValN(FunctionSummary::FSHT_All, "all" , "All edges." ))); |
79 | |
80 | static cl::opt<std::string> ModuleSummaryDotFile( |
81 | "module-summary-dot-file" , cl::Hidden, cl::value_desc("filename" ), |
82 | cl::desc("File to emit dot graph of new summary into" )); |
83 | |
84 | extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize; |
85 | |
86 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
87 | |
88 | // Walk through the operands of a given User via worklist iteration and populate |
89 | // the set of GlobalValue references encountered. Invoked either on an |
90 | // Instruction or a GlobalVariable (which walks its initializer). |
91 | // Return true if any of the operands contains blockaddress. This is important |
92 | // to know when computing summary for global var, because if global variable |
93 | // references basic block address we can't import it separately from function |
94 | // containing that basic block. For simplicity we currently don't import such |
95 | // global vars at all. When importing function we aren't interested if any |
96 | // instruction in it takes an address of any basic block, because instruction |
97 | // can only take an address of basic block located in the same function. |
98 | static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, |
99 | SetVector<ValueInfo, std::vector<ValueInfo>> &RefEdges, |
100 | SmallPtrSet<const User *, 8> &Visited) { |
101 | bool HasBlockAddress = false; |
102 | SmallVector<const User *, 32> Worklist; |
103 | if (Visited.insert(Ptr: CurUser).second) |
104 | Worklist.push_back(Elt: CurUser); |
105 | |
106 | while (!Worklist.empty()) { |
107 | const User *U = Worklist.pop_back_val(); |
108 | const auto *CB = dyn_cast<CallBase>(Val: U); |
109 | |
110 | for (const auto &OI : U->operands()) { |
111 | const User *Operand = dyn_cast<User>(Val: OI); |
112 | if (!Operand) |
113 | continue; |
114 | if (isa<BlockAddress>(Val: Operand)) { |
115 | HasBlockAddress = true; |
116 | continue; |
117 | } |
118 | if (auto *GV = dyn_cast<GlobalValue>(Val: Operand)) { |
119 | // We have a reference to a global value. This should be added to |
120 | // the reference set unless it is a callee. Callees are handled |
121 | // specially by WriteFunction and are added to a separate list. |
122 | if (!(CB && CB->isCallee(U: &OI))) |
123 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
124 | continue; |
125 | } |
126 | if (Visited.insert(Ptr: Operand).second) |
127 | Worklist.push_back(Elt: Operand); |
128 | } |
129 | } |
130 | |
131 | const Instruction *I = dyn_cast<Instruction>(Val: CurUser); |
132 | if (I) { |
133 | uint32_t ActualNumValueData = 0; |
134 | uint64_t TotalCount = 0; |
135 | // MaxNumVTableAnnotations is the maximum number of vtables annotated on |
136 | // the instruction. |
137 | auto ValueDataArray = |
138 | getValueProfDataFromInst(Inst: *I, ValueKind: IPVK_VTableTarget, MaxNumValueData: MaxNumVTableAnnotations, |
139 | ActualNumValueData, TotalC&: TotalCount); |
140 | |
141 | if (ValueDataArray.get()) { |
142 | for (uint32_t j = 0; j < ActualNumValueData; j++) { |
143 | RefEdges.insert(X: Index.getOrInsertValueInfo(/* VTableGUID = */ |
144 | GUID: ValueDataArray[j].Value)); |
145 | } |
146 | } |
147 | } |
148 | return HasBlockAddress; |
149 | } |
150 | |
151 | static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, |
152 | ProfileSummaryInfo *PSI) { |
153 | if (!PSI) |
154 | return CalleeInfo::HotnessType::Unknown; |
155 | if (PSI->isHotCount(C: ProfileCount)) |
156 | return CalleeInfo::HotnessType::Hot; |
157 | if (PSI->isColdCount(C: ProfileCount)) |
158 | return CalleeInfo::HotnessType::Cold; |
159 | return CalleeInfo::HotnessType::None; |
160 | } |
161 | |
162 | static bool isNonRenamableLocal(const GlobalValue &GV) { |
163 | return GV.hasSection() && GV.hasLocalLinkage(); |
164 | } |
165 | |
166 | /// Determine whether this call has all constant integer arguments (excluding |
167 | /// "this") and summarize it to VCalls or ConstVCalls as appropriate. |
168 | static void addVCallToSet( |
169 | DevirtCallSite Call, GlobalValue::GUID Guid, |
170 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
171 | &VCalls, |
172 | SetVector<FunctionSummary::ConstVCall, |
173 | std::vector<FunctionSummary::ConstVCall>> &ConstVCalls) { |
174 | std::vector<uint64_t> Args; |
175 | // Start from the second argument to skip the "this" pointer. |
176 | for (auto &Arg : drop_begin(RangeOrContainer: Call.CB.args())) { |
177 | auto *CI = dyn_cast<ConstantInt>(Val&: Arg); |
178 | if (!CI || CI->getBitWidth() > 64) { |
179 | VCalls.insert(X: {.GUID: Guid, .Offset: Call.Offset}); |
180 | return; |
181 | } |
182 | Args.push_back(x: CI->getZExtValue()); |
183 | } |
184 | ConstVCalls.insert(X: {.VFunc: {.GUID: Guid, .Offset: Call.Offset}, .Args: std::move(Args)}); |
185 | } |
186 | |
187 | /// If this intrinsic call requires that we add information to the function |
188 | /// summary, do so via the non-constant reference arguments. |
189 | static void addIntrinsicToSummary( |
190 | const CallInst *CI, |
191 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> &TypeTests, |
192 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
193 | &TypeTestAssumeVCalls, |
194 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
195 | &TypeCheckedLoadVCalls, |
196 | SetVector<FunctionSummary::ConstVCall, |
197 | std::vector<FunctionSummary::ConstVCall>> |
198 | &TypeTestAssumeConstVCalls, |
199 | SetVector<FunctionSummary::ConstVCall, |
200 | std::vector<FunctionSummary::ConstVCall>> |
201 | &TypeCheckedLoadConstVCalls, |
202 | DominatorTree &DT) { |
203 | switch (CI->getCalledFunction()->getIntrinsicID()) { |
204 | case Intrinsic::type_test: |
205 | case Intrinsic::public_type_test: { |
206 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
207 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
208 | if (!TypeId) |
209 | break; |
210 | GlobalValue::GUID Guid = GlobalValue::getGUID(GlobalName: TypeId->getString()); |
211 | |
212 | // Produce a summary from type.test intrinsics. We only summarize type.test |
213 | // intrinsics that are used other than by an llvm.assume intrinsic. |
214 | // Intrinsics that are assumed are relevant only to the devirtualization |
215 | // pass, not the type test lowering pass. |
216 | bool HasNonAssumeUses = llvm::any_of(Range: CI->uses(), P: [](const Use &CIU) { |
217 | return !isa<AssumeInst>(Val: CIU.getUser()); |
218 | }); |
219 | if (HasNonAssumeUses) |
220 | TypeTests.insert(X: Guid); |
221 | |
222 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
223 | SmallVector<CallInst *, 4> Assumes; |
224 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); |
225 | for (auto &Call : DevirtCalls) |
226 | addVCallToSet(Call, Guid, VCalls&: TypeTestAssumeVCalls, |
227 | ConstVCalls&: TypeTestAssumeConstVCalls); |
228 | |
229 | break; |
230 | } |
231 | |
232 | case Intrinsic::type_checked_load_relative: |
233 | case Intrinsic::type_checked_load: { |
234 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2)); |
235 | auto *TypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
236 | if (!TypeId) |
237 | break; |
238 | GlobalValue::GUID Guid = GlobalValue::getGUID(GlobalName: TypeId->getString()); |
239 | |
240 | SmallVector<DevirtCallSite, 4> DevirtCalls; |
241 | SmallVector<Instruction *, 4> LoadedPtrs; |
242 | SmallVector<Instruction *, 4> Preds; |
243 | bool HasNonCallUses = false; |
244 | findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, |
245 | HasNonCallUses, CI, DT); |
246 | // Any non-call uses of the result of llvm.type.checked.load will |
247 | // prevent us from optimizing away the llvm.type.test. |
248 | if (HasNonCallUses) |
249 | TypeTests.insert(X: Guid); |
250 | for (auto &Call : DevirtCalls) |
251 | addVCallToSet(Call, Guid, VCalls&: TypeCheckedLoadVCalls, |
252 | ConstVCalls&: TypeCheckedLoadConstVCalls); |
253 | |
254 | break; |
255 | } |
256 | default: |
257 | break; |
258 | } |
259 | } |
260 | |
261 | static bool isNonVolatileLoad(const Instruction *I) { |
262 | if (const auto *LI = dyn_cast<LoadInst>(Val: I)) |
263 | return !LI->isVolatile(); |
264 | |
265 | return false; |
266 | } |
267 | |
268 | static bool isNonVolatileStore(const Instruction *I) { |
269 | if (const auto *SI = dyn_cast<StoreInst>(Val: I)) |
270 | return !SI->isVolatile(); |
271 | |
272 | return false; |
273 | } |
274 | |
275 | // Returns true if the function definition must be unreachable. |
276 | // |
277 | // Note if this helper function returns true, `F` is guaranteed |
278 | // to be unreachable; if it returns false, `F` might still |
279 | // be unreachable but not covered by this helper function. |
280 | static bool mustBeUnreachableFunction(const Function &F) { |
281 | // A function must be unreachable if its entry block ends with an |
282 | // 'unreachable'. |
283 | assert(!F.isDeclaration()); |
284 | return isa<UnreachableInst>(Val: F.getEntryBlock().getTerminator()); |
285 | } |
286 | |
287 | static void computeFunctionSummary( |
288 | ModuleSummaryIndex &Index, const Module &M, const Function &F, |
289 | BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, |
290 | bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted, |
291 | bool IsThinLTO, |
292 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
293 | // Summary not currently supported for anonymous functions, they should |
294 | // have been named. |
295 | assert(F.hasName()); |
296 | |
297 | unsigned NumInsts = 0; |
298 | // Map from callee ValueId to profile count. Used to accumulate profile |
299 | // counts for all static calls to a given callee. |
300 | MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>, |
301 | std::vector<std::pair<ValueInfo, CalleeInfo>>> |
302 | CallGraphEdges; |
303 | SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges, LoadRefEdges, |
304 | StoreRefEdges; |
305 | SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests; |
306 | SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>> |
307 | TypeTestAssumeVCalls, TypeCheckedLoadVCalls; |
308 | SetVector<FunctionSummary::ConstVCall, |
309 | std::vector<FunctionSummary::ConstVCall>> |
310 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls; |
311 | ICallPromotionAnalysis ICallAnalysis; |
312 | SmallPtrSet<const User *, 8> Visited; |
313 | |
314 | // Add personality function, prefix data and prologue data to function's ref |
315 | // list. |
316 | findRefEdges(Index, CurUser: &F, RefEdges, Visited); |
317 | std::vector<const Instruction *> NonVolatileLoads; |
318 | std::vector<const Instruction *> NonVolatileStores; |
319 | |
320 | std::vector<CallsiteInfo> Callsites; |
321 | std::vector<AllocInfo> Allocs; |
322 | |
323 | #ifndef NDEBUG |
324 | DenseSet<const CallBase *> CallsThatMayHaveMemprofSummary; |
325 | #endif |
326 | |
327 | bool HasInlineAsmMaybeReferencingInternal = false; |
328 | bool HasIndirBranchToBlockAddress = false; |
329 | bool HasIFuncCall = false; |
330 | bool HasUnknownCall = false; |
331 | bool MayThrow = false; |
332 | for (const BasicBlock &BB : F) { |
333 | // We don't allow inlining of function with indirect branch to blockaddress. |
334 | // If the blockaddress escapes the function, e.g., via a global variable, |
335 | // inlining may lead to an invalid cross-function reference. So we shouldn't |
336 | // import such function either. |
337 | if (BB.hasAddressTaken()) { |
338 | for (User *U : BlockAddress::get(BB: const_cast<BasicBlock *>(&BB))->users()) |
339 | if (!isa<CallBrInst>(Val: *U)) { |
340 | HasIndirBranchToBlockAddress = true; |
341 | break; |
342 | } |
343 | } |
344 | |
345 | for (const Instruction &I : BB) { |
346 | if (I.isDebugOrPseudoInst()) |
347 | continue; |
348 | ++NumInsts; |
349 | |
350 | // Regular LTO module doesn't participate in ThinLTO import, |
351 | // so no reference from it can be read/writeonly, since this |
352 | // would require importing variable as local copy |
353 | if (IsThinLTO) { |
354 | if (isNonVolatileLoad(I: &I)) { |
355 | // Postpone processing of non-volatile load instructions |
356 | // See comments below |
357 | Visited.insert(Ptr: &I); |
358 | NonVolatileLoads.push_back(x: &I); |
359 | continue; |
360 | } else if (isNonVolatileStore(I: &I)) { |
361 | Visited.insert(Ptr: &I); |
362 | NonVolatileStores.push_back(x: &I); |
363 | // All references from second operand of store (destination address) |
364 | // can be considered write-only if they're not referenced by any |
365 | // non-store instruction. References from first operand of store |
366 | // (stored value) can't be treated either as read- or as write-only |
367 | // so we add them to RefEdges as we do with all other instructions |
368 | // except non-volatile load. |
369 | Value *Stored = I.getOperand(i: 0); |
370 | if (auto *GV = dyn_cast<GlobalValue>(Val: Stored)) |
371 | // findRefEdges will try to examine GV operands, so instead |
372 | // of calling it we should add GV to RefEdges directly. |
373 | RefEdges.insert(X: Index.getOrInsertValueInfo(GV)); |
374 | else if (auto *U = dyn_cast<User>(Val: Stored)) |
375 | findRefEdges(Index, CurUser: U, RefEdges, Visited); |
376 | continue; |
377 | } |
378 | } |
379 | findRefEdges(Index, CurUser: &I, RefEdges, Visited); |
380 | const auto *CB = dyn_cast<CallBase>(Val: &I); |
381 | if (!CB) { |
382 | if (I.mayThrow()) |
383 | MayThrow = true; |
384 | continue; |
385 | } |
386 | |
387 | const auto *CI = dyn_cast<CallInst>(Val: &I); |
388 | // Since we don't know exactly which local values are referenced in inline |
389 | // assembly, conservatively mark the function as possibly referencing |
390 | // a local value from inline assembly to ensure we don't export a |
391 | // reference (which would require renaming and promotion of the |
392 | // referenced value). |
393 | if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) |
394 | HasInlineAsmMaybeReferencingInternal = true; |
395 | |
396 | auto *CalledValue = CB->getCalledOperand(); |
397 | auto *CalledFunction = CB->getCalledFunction(); |
398 | if (CalledValue && !CalledFunction) { |
399 | CalledValue = CalledValue->stripPointerCasts(); |
400 | // Stripping pointer casts can reveal a called function. |
401 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
402 | } |
403 | // Check if this is an alias to a function. If so, get the |
404 | // called aliasee for the checks below. |
405 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
406 | assert(!CalledFunction && "Expected null called function in callsite for alias" ); |
407 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
408 | } |
409 | // Check if this is a direct call to a known function or a known |
410 | // intrinsic, or an indirect call with profile data. |
411 | if (CalledFunction) { |
412 | if (CI && CalledFunction->isIntrinsic()) { |
413 | addIntrinsicToSummary( |
414 | CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls, |
415 | TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT); |
416 | continue; |
417 | } |
418 | // We should have named any anonymous globals |
419 | assert(CalledFunction->hasName()); |
420 | auto ScaledCount = PSI->getProfileCount(CallInst: *CB, BFI); |
421 | auto Hotness = ScaledCount ? getHotness(ProfileCount: *ScaledCount, PSI) |
422 | : CalleeInfo::HotnessType::Unknown; |
423 | if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) |
424 | Hotness = CalleeInfo::HotnessType::Cold; |
425 | |
426 | // Use the original CalledValue, in case it was an alias. We want |
427 | // to record the call edge to the alias in that case. Eventually |
428 | // an alias summary will be created to associate the alias and |
429 | // aliasee. |
430 | auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo( |
431 | GV: cast<GlobalValue>(Val: CalledValue))]; |
432 | ValueInfo.updateHotness(OtherHotness: Hotness); |
433 | if (CB->isTailCall()) |
434 | ValueInfo.setHasTailCall(true); |
435 | // Add the relative block frequency to CalleeInfo if there is no profile |
436 | // information. |
437 | if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { |
438 | uint64_t BBFreq = BFI->getBlockFreq(BB: &BB).getFrequency(); |
439 | uint64_t EntryFreq = BFI->getEntryFreq().getFrequency(); |
440 | ValueInfo.updateRelBlockFreq(BlockFreq: BBFreq, EntryFreq); |
441 | } |
442 | } else { |
443 | HasUnknownCall = true; |
444 | // If F is imported, a local linkage ifunc (e.g. target_clones on a |
445 | // static function) called by F will be cloned. Since summaries don't |
446 | // track ifunc, we do not know implementation functions referenced by |
447 | // the ifunc resolver need to be promoted in the exporter, and we will |
448 | // get linker errors due to cloned declarations for implementation |
449 | // functions. As a simple fix, just mark F as not eligible for import. |
450 | // Non-local ifunc is not cloned and does not have the issue. |
451 | if (auto *GI = dyn_cast_if_present<GlobalIFunc>(Val: CalledValue)) |
452 | if (GI->hasLocalLinkage()) |
453 | HasIFuncCall = true; |
454 | // Skip inline assembly calls. |
455 | if (CI && CI->isInlineAsm()) |
456 | continue; |
457 | // Skip direct calls. |
458 | if (!CalledValue || isa<Constant>(Val: CalledValue)) |
459 | continue; |
460 | |
461 | // Check if the instruction has a callees metadata. If so, add callees |
462 | // to CallGraphEdges to reflect the references from the metadata, and |
463 | // to enable importing for subsequent indirect call promotion and |
464 | // inlining. |
465 | if (auto *MD = I.getMetadata(KindID: LLVMContext::MD_callees)) { |
466 | for (const auto &Op : MD->operands()) { |
467 | Function *Callee = mdconst::extract_or_null<Function>(MD: Op); |
468 | if (Callee) |
469 | CallGraphEdges[Index.getOrInsertValueInfo(GV: Callee)]; |
470 | } |
471 | } |
472 | |
473 | uint32_t NumVals, NumCandidates; |
474 | uint64_t TotalCount; |
475 | auto CandidateProfileData = |
476 | ICallAnalysis.getPromotionCandidatesForInstruction( |
477 | I: &I, NumVals, TotalCount, NumCandidates); |
478 | for (const auto &Candidate : CandidateProfileData) |
479 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: Candidate.Value)] |
480 | .updateHotness(OtherHotness: getHotness(ProfileCount: Candidate.Count, PSI)); |
481 | } |
482 | |
483 | // Summarize memprof related metadata. This is only needed for ThinLTO. |
484 | if (!IsThinLTO) |
485 | continue; |
486 | |
487 | // TODO: Skip indirect calls for now. Need to handle these better, likely |
488 | // by creating multiple Callsites, one per target, then speculatively |
489 | // devirtualize while applying clone info in the ThinLTO backends. This |
490 | // will also be important because we will have a different set of clone |
491 | // versions per target. This handling needs to match that in the ThinLTO |
492 | // backend so we handle things consistently for matching of callsite |
493 | // summaries to instructions. |
494 | if (!CalledFunction) |
495 | continue; |
496 | |
497 | // Ensure we keep this analysis in sync with the handling in the ThinLTO |
498 | // backend (see MemProfContextDisambiguation::applyImport). Save this call |
499 | // so that we can skip it in checking the reverse case later. |
500 | assert(mayHaveMemprofSummary(CB)); |
501 | #ifndef NDEBUG |
502 | CallsThatMayHaveMemprofSummary.insert(V: CB); |
503 | #endif |
504 | |
505 | // Compute the list of stack ids first (so we can trim them from the stack |
506 | // ids on any MIBs). |
507 | CallStack<MDNode, MDNode::op_iterator> InstCallsite( |
508 | I.getMetadata(KindID: LLVMContext::MD_callsite)); |
509 | auto *MemProfMD = I.getMetadata(KindID: LLVMContext::MD_memprof); |
510 | if (MemProfMD) { |
511 | std::vector<MIBInfo> MIBs; |
512 | for (auto &MDOp : MemProfMD->operands()) { |
513 | auto *MIBMD = cast<const MDNode>(Val: MDOp); |
514 | MDNode *StackNode = getMIBStackNode(MIB: MIBMD); |
515 | assert(StackNode); |
516 | SmallVector<unsigned> StackIdIndices; |
517 | CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); |
518 | // Collapse out any on the allocation call (inlining). |
519 | for (auto ContextIter = |
520 | StackContext.beginAfterSharedPrefix(Other&: InstCallsite); |
521 | ContextIter != StackContext.end(); ++ContextIter) { |
522 | unsigned StackIdIdx = Index.addOrGetStackIdIndex(StackId: *ContextIter); |
523 | // If this is a direct recursion, simply skip the duplicate |
524 | // entries. If this is mutual recursion, handling is left to |
525 | // the LTO link analysis client. |
526 | if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx) |
527 | StackIdIndices.push_back(Elt: StackIdIdx); |
528 | } |
529 | MIBs.push_back( |
530 | x: MIBInfo(getMIBAllocType(MIB: MIBMD), std::move(StackIdIndices))); |
531 | } |
532 | Allocs.push_back(x: AllocInfo(std::move(MIBs))); |
533 | } else if (!InstCallsite.empty()) { |
534 | SmallVector<unsigned> StackIdIndices; |
535 | for (auto StackId : InstCallsite) |
536 | StackIdIndices.push_back(Elt: Index.addOrGetStackIdIndex(StackId)); |
537 | // Use the original CalledValue, in case it was an alias. We want |
538 | // to record the call edge to the alias in that case. Eventually |
539 | // an alias summary will be created to associate the alias and |
540 | // aliasee. |
541 | auto CalleeValueInfo = |
542 | Index.getOrInsertValueInfo(GV: cast<GlobalValue>(Val: CalledValue)); |
543 | Callsites.push_back(x: {CalleeValueInfo, StackIdIndices}); |
544 | } |
545 | } |
546 | } |
547 | |
548 | if (PSI->hasPartialSampleProfile() && ScalePartialSampleProfileWorkingSetSize) |
549 | Index.addBlockCount(C: F.size()); |
550 | |
551 | std::vector<ValueInfo> Refs; |
552 | if (IsThinLTO) { |
553 | auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs, |
554 | SetVector<ValueInfo, std::vector<ValueInfo>> &Edges, |
555 | SmallPtrSet<const User *, 8> &Cache) { |
556 | for (const auto *I : Instrs) { |
557 | Cache.erase(Ptr: I); |
558 | findRefEdges(Index, CurUser: I, RefEdges&: Edges, Visited&: Cache); |
559 | } |
560 | }; |
561 | |
562 | // By now we processed all instructions in a function, except |
563 | // non-volatile loads and non-volatile value stores. Let's find |
564 | // ref edges for both of instruction sets |
565 | AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited); |
566 | // We can add some values to the Visited set when processing load |
567 | // instructions which are also used by stores in NonVolatileStores. |
568 | // For example this can happen if we have following code: |
569 | // |
570 | // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) |
571 | // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) |
572 | // |
573 | // After processing loads we'll add bitcast to the Visited set, and if |
574 | // we use the same set while processing stores, we'll never see store |
575 | // to @bar and @bar will be mistakenly treated as readonly. |
576 | SmallPtrSet<const llvm::User *, 8> StoreCache; |
577 | AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache); |
578 | |
579 | // If both load and store instruction reference the same variable |
580 | // we won't be able to optimize it. Add all such reference edges |
581 | // to RefEdges set. |
582 | for (const auto &VI : StoreRefEdges) |
583 | if (LoadRefEdges.remove(X: VI)) |
584 | RefEdges.insert(X: VI); |
585 | |
586 | unsigned RefCnt = RefEdges.size(); |
587 | // All new reference edges inserted in two loops below are either |
588 | // read or write only. They will be grouped in the end of RefEdges |
589 | // vector, so we can use a single integer value to identify them. |
590 | for (const auto &VI : LoadRefEdges) |
591 | RefEdges.insert(X: VI); |
592 | |
593 | unsigned FirstWORef = RefEdges.size(); |
594 | for (const auto &VI : StoreRefEdges) |
595 | RefEdges.insert(X: VI); |
596 | |
597 | Refs = RefEdges.takeVector(); |
598 | for (; RefCnt < FirstWORef; ++RefCnt) |
599 | Refs[RefCnt].setReadOnly(); |
600 | |
601 | for (; RefCnt < Refs.size(); ++RefCnt) |
602 | Refs[RefCnt].setWriteOnly(); |
603 | } else { |
604 | Refs = RefEdges.takeVector(); |
605 | } |
606 | // Explicit add hot edges to enforce importing for designated GUIDs for |
607 | // sample PGO, to enable the same inlines as the profiled optimized binary. |
608 | for (auto &I : F.getImportGUIDs()) |
609 | CallGraphEdges[Index.getOrInsertValueInfo(GUID: I)].updateHotness( |
610 | OtherHotness: ForceSummaryEdgesCold == FunctionSummary::FSHT_All |
611 | ? CalleeInfo::HotnessType::Cold |
612 | : CalleeInfo::HotnessType::Critical); |
613 | |
614 | #ifndef NDEBUG |
615 | // Make sure that all calls we decided could not have memprof summaries get a |
616 | // false value for mayHaveMemprofSummary, to ensure that this handling remains |
617 | // in sync with the ThinLTO backend handling. |
618 | if (IsThinLTO) { |
619 | for (const BasicBlock &BB : F) { |
620 | for (const Instruction &I : BB) { |
621 | const auto *CB = dyn_cast<CallBase>(Val: &I); |
622 | if (!CB) |
623 | continue; |
624 | // We already checked these above. |
625 | if (CallsThatMayHaveMemprofSummary.count(V: CB)) |
626 | continue; |
627 | assert(!mayHaveMemprofSummary(CB)); |
628 | } |
629 | } |
630 | } |
631 | #endif |
632 | |
633 | bool NonRenamableLocal = isNonRenamableLocal(GV: F); |
634 | bool NotEligibleForImport = NonRenamableLocal || |
635 | HasInlineAsmMaybeReferencingInternal || |
636 | HasIndirBranchToBlockAddress || HasIFuncCall; |
637 | GlobalValueSummary::GVFlags Flags( |
638 | F.getLinkage(), F.getVisibility(), NotEligibleForImport, |
639 | /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable(), |
640 | GlobalValueSummary::ImportKind::Definition); |
641 | FunctionSummary::FFlags FunFlags{ |
642 | F.doesNotAccessMemory(), F.onlyReadsMemory() && !F.doesNotAccessMemory(), |
643 | F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(), |
644 | // FIXME: refactor this to use the same code that inliner is using. |
645 | // Don't try to import functions with noinline attribute. |
646 | F.getAttributes().hasFnAttr(Attribute::NoInline), |
647 | F.hasFnAttribute(Attribute::AlwaysInline), |
648 | F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall, |
649 | mustBeUnreachableFunction(F)}; |
650 | std::vector<FunctionSummary::ParamAccess> ParamAccesses; |
651 | if (auto *SSI = GetSSICallback(F)) |
652 | ParamAccesses = SSI->getParamAccesses(Index); |
653 | auto FuncSummary = std::make_unique<FunctionSummary>( |
654 | args&: Flags, args&: NumInsts, args&: FunFlags, /*EntryCount=*/args: 0, args: std::move(Refs), |
655 | args: CallGraphEdges.takeVector(), args: TypeTests.takeVector(), |
656 | args: TypeTestAssumeVCalls.takeVector(), args: TypeCheckedLoadVCalls.takeVector(), |
657 | args: TypeTestAssumeConstVCalls.takeVector(), |
658 | args: TypeCheckedLoadConstVCalls.takeVector(), args: std::move(ParamAccesses), |
659 | args: std::move(Callsites), args: std::move(Allocs)); |
660 | if (NonRenamableLocal) |
661 | CantBePromoted.insert(V: F.getGUID()); |
662 | Index.addGlobalValueSummary(GV: F, Summary: std::move(FuncSummary)); |
663 | } |
664 | |
665 | /// Find function pointers referenced within the given vtable initializer |
666 | /// (or subset of an initializer) \p I. The starting offset of \p I within |
667 | /// the vtable initializer is \p StartingOffset. Any discovered function |
668 | /// pointers are added to \p VTableFuncs along with their cumulative offset |
669 | /// within the initializer. |
670 | static void findFuncPointers(const Constant *I, uint64_t StartingOffset, |
671 | const Module &M, ModuleSummaryIndex &Index, |
672 | VTableFuncList &VTableFuncs, |
673 | const GlobalVariable &OrigGV) { |
674 | // First check if this is a function pointer. |
675 | if (I->getType()->isPointerTy()) { |
676 | auto C = I->stripPointerCasts(); |
677 | auto A = dyn_cast<GlobalAlias>(Val: C); |
678 | if (isa<Function>(Val: C) || (A && isa<Function>(Val: A->getAliasee()))) { |
679 | auto GV = dyn_cast<GlobalValue>(Val: C); |
680 | assert(GV); |
681 | // We can disregard __cxa_pure_virtual as a possible call target, as |
682 | // calls to pure virtuals are UB. |
683 | if (GV && GV->getName() != "__cxa_pure_virtual" ) |
684 | VTableFuncs.push_back(x: {Index.getOrInsertValueInfo(GV), StartingOffset}); |
685 | return; |
686 | } |
687 | } |
688 | |
689 | // Walk through the elements in the constant struct or array and recursively |
690 | // look for virtual function pointers. |
691 | const DataLayout &DL = M.getDataLayout(); |
692 | if (auto *C = dyn_cast<ConstantStruct>(Val: I)) { |
693 | StructType *STy = dyn_cast<StructType>(Val: C->getType()); |
694 | assert(STy); |
695 | const StructLayout *SL = DL.getStructLayout(Ty: C->getType()); |
696 | |
697 | for (auto EI : llvm::enumerate(First: STy->elements())) { |
698 | auto Offset = SL->getElementOffset(Idx: EI.index()); |
699 | unsigned Op = SL->getElementContainingOffset(FixedOffset: Offset); |
700 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i: Op)), |
701 | StartingOffset: StartingOffset + Offset, M, Index, VTableFuncs, OrigGV); |
702 | } |
703 | } else if (auto *C = dyn_cast<ConstantArray>(Val: I)) { |
704 | ArrayType *ATy = C->getType(); |
705 | Type *EltTy = ATy->getElementType(); |
706 | uint64_t EltSize = DL.getTypeAllocSize(Ty: EltTy); |
707 | for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { |
708 | findFuncPointers(I: cast<Constant>(Val: I->getOperand(i)), |
709 | StartingOffset: StartingOffset + i * EltSize, M, Index, VTableFuncs, |
710 | OrigGV); |
711 | } |
712 | } else if (const auto *CE = dyn_cast<ConstantExpr>(Val: I)) { |
713 | // For relative vtables, the next sub-component should be a trunc. |
714 | if (CE->getOpcode() != Instruction::Trunc || |
715 | !(CE = dyn_cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0)))) |
716 | return; |
717 | |
718 | // If this constant can be reduced to the offset between a function and a |
719 | // global, then we know this is a valid virtual function if the RHS is the |
720 | // original vtable we're scanning through. |
721 | if (CE->getOpcode() == Instruction::Sub) { |
722 | GlobalValue *LHS, *RHS; |
723 | APSInt LHSOffset, RHSOffset; |
724 | if (IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 0), GV&: LHS, Offset&: LHSOffset, DL) && |
725 | IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 1), GV&: RHS, Offset&: RHSOffset, DL) && |
726 | RHS == &OrigGV && |
727 | |
728 | // For relative vtables, this component should point to the callable |
729 | // function without any offsets. |
730 | LHSOffset == 0 && |
731 | |
732 | // Also, the RHS should always point to somewhere within the vtable. |
733 | RHSOffset <= |
734 | static_cast<uint64_t>(DL.getTypeAllocSize(Ty: OrigGV.getInitializer()->getType()))) { |
735 | findFuncPointers(I: LHS, StartingOffset, M, Index, VTableFuncs, OrigGV); |
736 | } |
737 | } |
738 | } |
739 | } |
740 | |
741 | // Identify the function pointers referenced by vtable definition \p V. |
742 | static void computeVTableFuncs(ModuleSummaryIndex &Index, |
743 | const GlobalVariable &V, const Module &M, |
744 | VTableFuncList &VTableFuncs) { |
745 | if (!V.isConstant()) |
746 | return; |
747 | |
748 | findFuncPointers(I: V.getInitializer(), /*StartingOffset=*/0, M, Index, |
749 | VTableFuncs, OrigGV: V); |
750 | |
751 | #ifndef NDEBUG |
752 | // Validate that the VTableFuncs list is ordered by offset. |
753 | uint64_t PrevOffset = 0; |
754 | for (auto &P : VTableFuncs) { |
755 | // The findVFuncPointers traversal should have encountered the |
756 | // functions in offset order. We need to use ">=" since PrevOffset |
757 | // starts at 0. |
758 | assert(P.VTableOffset >= PrevOffset); |
759 | PrevOffset = P.VTableOffset; |
760 | } |
761 | #endif |
762 | } |
763 | |
764 | /// Record vtable definition \p V for each type metadata it references. |
765 | static void |
766 | recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index, |
767 | const GlobalVariable &V, |
768 | SmallVectorImpl<MDNode *> &Types) { |
769 | for (MDNode *Type : Types) { |
770 | auto TypeID = Type->getOperand(I: 1).get(); |
771 | |
772 | uint64_t Offset = |
773 | cast<ConstantInt>( |
774 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
775 | ->getZExtValue(); |
776 | |
777 | if (auto *TypeId = dyn_cast<MDString>(Val: TypeID)) |
778 | Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId: TypeId->getString()) |
779 | .push_back(x: {Offset, Index.getOrInsertValueInfo(GV: &V)}); |
780 | } |
781 | } |
782 | |
783 | static void computeVariableSummary(ModuleSummaryIndex &Index, |
784 | const GlobalVariable &V, |
785 | DenseSet<GlobalValue::GUID> &CantBePromoted, |
786 | const Module &M, |
787 | SmallVectorImpl<MDNode *> &Types) { |
788 | SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges; |
789 | SmallPtrSet<const User *, 8> Visited; |
790 | bool HasBlockAddress = findRefEdges(Index, CurUser: &V, RefEdges, Visited); |
791 | bool NonRenamableLocal = isNonRenamableLocal(GV: V); |
792 | GlobalValueSummary::GVFlags Flags( |
793 | V.getLinkage(), V.getVisibility(), NonRenamableLocal, |
794 | /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable(), |
795 | GlobalValueSummary::Definition); |
796 | |
797 | VTableFuncList VTableFuncs; |
798 | // If splitting is not enabled, then we compute the summary information |
799 | // necessary for index-based whole program devirtualization. |
800 | if (!Index.enableSplitLTOUnit()) { |
801 | Types.clear(); |
802 | V.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
803 | if (!Types.empty()) { |
804 | // Identify the function pointers referenced by this vtable definition. |
805 | computeVTableFuncs(Index, V, M, VTableFuncs); |
806 | |
807 | // Record this vtable definition for each type metadata it references. |
808 | recordTypeIdCompatibleVtableReferences(Index, V, Types); |
809 | } |
810 | } |
811 | |
812 | // Don't mark variables we won't be able to internalize as read/write-only. |
813 | bool CanBeInternalized = |
814 | !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && |
815 | !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); |
816 | bool Constant = V.isConstant(); |
817 | GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, |
818 | Constant ? false : CanBeInternalized, |
819 | Constant, V.getVCallVisibility()); |
820 | auto GVarSummary = std::make_unique<GlobalVarSummary>(args&: Flags, args&: VarFlags, |
821 | args: RefEdges.takeVector()); |
822 | if (NonRenamableLocal) |
823 | CantBePromoted.insert(V: V.getGUID()); |
824 | if (HasBlockAddress) |
825 | GVarSummary->setNotEligibleToImport(); |
826 | if (!VTableFuncs.empty()) |
827 | GVarSummary->setVTableFuncs(VTableFuncs); |
828 | Index.addGlobalValueSummary(GV: V, Summary: std::move(GVarSummary)); |
829 | } |
830 | |
831 | static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, |
832 | DenseSet<GlobalValue::GUID> &CantBePromoted) { |
833 | // Skip summary for indirect function aliases as summary for aliasee will not |
834 | // be emitted. |
835 | const GlobalObject *Aliasee = A.getAliaseeObject(); |
836 | if (isa<GlobalIFunc>(Val: Aliasee)) |
837 | return; |
838 | bool NonRenamableLocal = isNonRenamableLocal(GV: A); |
839 | GlobalValueSummary::GVFlags Flags( |
840 | A.getLinkage(), A.getVisibility(), NonRenamableLocal, |
841 | /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable(), |
842 | GlobalValueSummary::Definition); |
843 | auto AS = std::make_unique<AliasSummary>(args&: Flags); |
844 | auto AliaseeVI = Index.getValueInfo(GUID: Aliasee->getGUID()); |
845 | assert(AliaseeVI && "Alias expects aliasee summary to be available" ); |
846 | assert(AliaseeVI.getSummaryList().size() == 1 && |
847 | "Expected a single entry per aliasee in per-module index" ); |
848 | AS->setAliasee(AliaseeVI, Aliasee: AliaseeVI.getSummaryList()[0].get()); |
849 | if (NonRenamableLocal) |
850 | CantBePromoted.insert(V: A.getGUID()); |
851 | Index.addGlobalValueSummary(GV: A, Summary: std::move(AS)); |
852 | } |
853 | |
854 | // Set LiveRoot flag on entries matching the given value name. |
855 | static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { |
856 | if (ValueInfo VI = Index.getValueInfo(GUID: GlobalValue::getGUID(GlobalName: Name))) |
857 | for (const auto &Summary : VI.getSummaryList()) |
858 | Summary->setLive(true); |
859 | } |
860 | |
861 | ModuleSummaryIndex llvm::buildModuleSummaryIndex( |
862 | const Module &M, |
863 | std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, |
864 | ProfileSummaryInfo *PSI, |
865 | std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) { |
866 | assert(PSI); |
867 | bool EnableSplitLTOUnit = false; |
868 | bool UnifiedLTO = false; |
869 | if (auto *MD = mdconst::extract_or_null<ConstantInt>( |
870 | MD: M.getModuleFlag(Key: "EnableSplitLTOUnit" ))) |
871 | EnableSplitLTOUnit = MD->getZExtValue(); |
872 | if (auto *MD = |
873 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "UnifiedLTO" ))) |
874 | UnifiedLTO = MD->getZExtValue(); |
875 | ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); |
876 | |
877 | // Identify the local values in the llvm.used and llvm.compiler.used sets, |
878 | // which should not be exported as they would then require renaming and |
879 | // promotion, but we may have opaque uses e.g. in inline asm. We collect them |
880 | // here because we use this information to mark functions containing inline |
881 | // assembly calls as not importable. |
882 | SmallPtrSet<GlobalValue *, 4> LocalsUsed; |
883 | SmallVector<GlobalValue *, 4> Used; |
884 | // First collect those in the llvm.used set. |
885 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/false); |
886 | // Next collect those in the llvm.compiler.used set. |
887 | collectUsedGlobalVariables(M, Vec&: Used, /*CompilerUsed=*/true); |
888 | DenseSet<GlobalValue::GUID> CantBePromoted; |
889 | for (auto *V : Used) { |
890 | if (V->hasLocalLinkage()) { |
891 | LocalsUsed.insert(Ptr: V); |
892 | CantBePromoted.insert(V: V->getGUID()); |
893 | } |
894 | } |
895 | |
896 | bool HasLocalInlineAsmSymbol = false; |
897 | if (!M.getModuleInlineAsm().empty()) { |
898 | // Collect the local values defined by module level asm, and set up |
899 | // summaries for these symbols so that they can be marked as NoRename, |
900 | // to prevent export of any use of them in regular IR that would require |
901 | // renaming within the module level asm. Note we don't need to create a |
902 | // summary for weak or global defs, as they don't need to be flagged as |
903 | // NoRename, and defs in module level asm can't be imported anyway. |
904 | // Also, any values used but not defined within module level asm should |
905 | // be listed on the llvm.used or llvm.compiler.used global and marked as |
906 | // referenced from there. |
907 | ModuleSymbolTable::CollectAsmSymbols( |
908 | M, AsmSymbol: [&](StringRef Name, object::BasicSymbolRef::Flags Flags) { |
909 | // Symbols not marked as Weak or Global are local definitions. |
910 | if (Flags & (object::BasicSymbolRef::SF_Weak | |
911 | object::BasicSymbolRef::SF_Global)) |
912 | return; |
913 | HasLocalInlineAsmSymbol = true; |
914 | GlobalValue *GV = M.getNamedValue(Name); |
915 | if (!GV) |
916 | return; |
917 | assert(GV->isDeclaration() && "Def in module asm already has definition" ); |
918 | GlobalValueSummary::GVFlags GVFlags( |
919 | GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility, |
920 | /* NotEligibleToImport = */ true, |
921 | /* Live = */ true, |
922 | /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable(), |
923 | GlobalValueSummary::Definition); |
924 | CantBePromoted.insert(V: GV->getGUID()); |
925 | // Create the appropriate summary type. |
926 | if (Function *F = dyn_cast<Function>(Val: GV)) { |
927 | std::unique_ptr<FunctionSummary> Summary = |
928 | std::make_unique<FunctionSummary>( |
929 | GVFlags, /*InstCount=*/0, |
930 | FunctionSummary::FFlags{ |
931 | F->hasFnAttribute(Attribute::ReadNone), |
932 | F->hasFnAttribute(Attribute::ReadOnly), |
933 | F->hasFnAttribute(Attribute::NoRecurse), |
934 | F->returnDoesNotAlias(), |
935 | /* NoInline = */ false, |
936 | F->hasFnAttribute(Attribute::AlwaysInline), |
937 | F->hasFnAttribute(Attribute::NoUnwind), |
938 | /* MayThrow */ true, |
939 | /* HasUnknownCall */ true, |
940 | /* MustBeUnreachable */ false}, |
941 | /*EntryCount=*/0, ArrayRef<ValueInfo>{}, |
942 | ArrayRef<FunctionSummary::EdgeTy>{}, |
943 | ArrayRef<GlobalValue::GUID>{}, |
944 | ArrayRef<FunctionSummary::VFuncId>{}, |
945 | ArrayRef<FunctionSummary::VFuncId>{}, |
946 | ArrayRef<FunctionSummary::ConstVCall>{}, |
947 | ArrayRef<FunctionSummary::ConstVCall>{}, |
948 | ArrayRef<FunctionSummary::ParamAccess>{}, |
949 | ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{}); |
950 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
951 | } else { |
952 | std::unique_ptr<GlobalVarSummary> Summary = |
953 | std::make_unique<GlobalVarSummary>( |
954 | args&: GVFlags, |
955 | args: GlobalVarSummary::GVarFlags( |
956 | false, false, cast<GlobalVariable>(Val: GV)->isConstant(), |
957 | GlobalObject::VCallVisibilityPublic), |
958 | args: ArrayRef<ValueInfo>{}); |
959 | Index.addGlobalValueSummary(GV: *GV, Summary: std::move(Summary)); |
960 | } |
961 | }); |
962 | } |
963 | |
964 | bool IsThinLTO = true; |
965 | if (auto *MD = |
966 | mdconst::extract_or_null<ConstantInt>(MD: M.getModuleFlag(Key: "ThinLTO" ))) |
967 | IsThinLTO = MD->getZExtValue(); |
968 | |
969 | // Compute summaries for all functions defined in module, and save in the |
970 | // index. |
971 | for (const auto &F : M) { |
972 | if (F.isDeclaration()) |
973 | continue; |
974 | |
975 | DominatorTree DT(const_cast<Function &>(F)); |
976 | BlockFrequencyInfo *BFI = nullptr; |
977 | std::unique_ptr<BlockFrequencyInfo> BFIPtr; |
978 | if (GetBFICallback) |
979 | BFI = GetBFICallback(F); |
980 | else if (F.hasProfileData()) { |
981 | LoopInfo LI{DT}; |
982 | BranchProbabilityInfo BPI{F, LI}; |
983 | BFIPtr = std::make_unique<BlockFrequencyInfo>(args: F, args&: BPI, args&: LI); |
984 | BFI = BFIPtr.get(); |
985 | } |
986 | |
987 | computeFunctionSummary(Index, M, F, BFI, PSI, DT, |
988 | HasLocalsInUsedOrAsm: !LocalsUsed.empty() || HasLocalInlineAsmSymbol, |
989 | CantBePromoted, IsThinLTO, GetSSICallback); |
990 | } |
991 | |
992 | // Compute summaries for all variables defined in module, and save in the |
993 | // index. |
994 | SmallVector<MDNode *, 2> Types; |
995 | for (const GlobalVariable &G : M.globals()) { |
996 | if (G.isDeclaration()) |
997 | continue; |
998 | computeVariableSummary(Index, V: G, CantBePromoted, M, Types); |
999 | } |
1000 | |
1001 | // Compute summaries for all aliases defined in module, and save in the |
1002 | // index. |
1003 | for (const GlobalAlias &A : M.aliases()) |
1004 | computeAliasSummary(Index, A, CantBePromoted); |
1005 | |
1006 | // Iterate through ifuncs, set their resolvers all alive. |
1007 | for (const GlobalIFunc &I : M.ifuncs()) { |
1008 | I.applyAlongResolverPath(Op: [&Index](const GlobalValue &GV) { |
1009 | Index.getGlobalValueSummary(GV)->setLive(true); |
1010 | }); |
1011 | } |
1012 | |
1013 | for (auto *V : LocalsUsed) { |
1014 | auto *Summary = Index.getGlobalValueSummary(GV: *V); |
1015 | assert(Summary && "Missing summary for global value" ); |
1016 | Summary->setNotEligibleToImport(); |
1017 | } |
1018 | |
1019 | // The linker doesn't know about these LLVM produced values, so we need |
1020 | // to flag them as live in the index to ensure index-based dead value |
1021 | // analysis treats them as live roots of the analysis. |
1022 | setLiveRoot(Index, Name: "llvm.used" ); |
1023 | setLiveRoot(Index, Name: "llvm.compiler.used" ); |
1024 | setLiveRoot(Index, Name: "llvm.global_ctors" ); |
1025 | setLiveRoot(Index, Name: "llvm.global_dtors" ); |
1026 | setLiveRoot(Index, Name: "llvm.global.annotations" ); |
1027 | |
1028 | for (auto &GlobalList : Index) { |
1029 | // Ignore entries for references that are undefined in the current module. |
1030 | if (GlobalList.second.SummaryList.empty()) |
1031 | continue; |
1032 | |
1033 | assert(GlobalList.second.SummaryList.size() == 1 && |
1034 | "Expected module's index to have one summary per GUID" ); |
1035 | auto &Summary = GlobalList.second.SummaryList[0]; |
1036 | if (!IsThinLTO) { |
1037 | Summary->setNotEligibleToImport(); |
1038 | continue; |
1039 | } |
1040 | |
1041 | bool AllRefsCanBeExternallyReferenced = |
1042 | llvm::all_of(Range: Summary->refs(), P: [&](const ValueInfo &VI) { |
1043 | return !CantBePromoted.count(V: VI.getGUID()); |
1044 | }); |
1045 | if (!AllRefsCanBeExternallyReferenced) { |
1046 | Summary->setNotEligibleToImport(); |
1047 | continue; |
1048 | } |
1049 | |
1050 | if (auto *FuncSummary = dyn_cast<FunctionSummary>(Val: Summary.get())) { |
1051 | bool AllCallsCanBeExternallyReferenced = llvm::all_of( |
1052 | Range: FuncSummary->calls(), P: [&](const FunctionSummary::EdgeTy &Edge) { |
1053 | return !CantBePromoted.count(V: Edge.first.getGUID()); |
1054 | }); |
1055 | if (!AllCallsCanBeExternallyReferenced) |
1056 | Summary->setNotEligibleToImport(); |
1057 | } |
1058 | } |
1059 | |
1060 | if (!ModuleSummaryDotFile.empty()) { |
1061 | std::error_code EC; |
1062 | raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None); |
1063 | if (EC) |
1064 | report_fatal_error(reason: Twine("Failed to open dot file " ) + |
1065 | ModuleSummaryDotFile + ": " + EC.message() + "\n" ); |
1066 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols: {}); |
1067 | } |
1068 | |
1069 | return Index; |
1070 | } |
1071 | |
1072 | AnalysisKey ModuleSummaryIndexAnalysis::Key; |
1073 | |
1074 | ModuleSummaryIndex |
1075 | ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { |
1076 | ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(IR&: M); |
1077 | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1078 | bool NeedSSI = needsParamAccessSummary(M); |
1079 | return buildModuleSummaryIndex( |
1080 | M, |
1081 | GetBFICallback: [&FAM](const Function &F) { |
1082 | return &FAM.getResult<BlockFrequencyAnalysis>( |
1083 | IR&: *const_cast<Function *>(&F)); |
1084 | }, |
1085 | PSI: &PSI, |
1086 | GetSSICallback: [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * { |
1087 | return NeedSSI ? &FAM.getResult<StackSafetyAnalysis>( |
1088 | IR&: const_cast<Function &>(F)) |
1089 | : nullptr; |
1090 | }); |
1091 | } |
1092 | |
1093 | char ModuleSummaryIndexWrapperPass::ID = 0; |
1094 | |
1095 | INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1096 | "Module Summary Analysis" , false, true) |
1097 | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) |
1098 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
1099 | INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) |
1100 | INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis" , |
1101 | "Module Summary Analysis" , false, true) |
1102 | |
1103 | ModulePass *llvm::createModuleSummaryIndexWrapperPass() { |
1104 | return new ModuleSummaryIndexWrapperPass(); |
1105 | } |
1106 | |
1107 | ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() |
1108 | : ModulePass(ID) { |
1109 | initializeModuleSummaryIndexWrapperPassPass(Registry&: *PassRegistry::getPassRegistry()); |
1110 | } |
1111 | |
1112 | bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { |
1113 | auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
1114 | bool NeedSSI = needsParamAccessSummary(M); |
1115 | Index.emplace(args: buildModuleSummaryIndex( |
1116 | M, |
1117 | GetBFICallback: [this](const Function &F) { |
1118 | return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>( |
1119 | F&: *const_cast<Function *>(&F)) |
1120 | .getBFI()); |
1121 | }, |
1122 | PSI, |
1123 | GetSSICallback: [&](const Function &F) -> const StackSafetyInfo * { |
1124 | return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>( |
1125 | F&: const_cast<Function &>(F)) |
1126 | .getResult() |
1127 | : nullptr; |
1128 | })); |
1129 | return false; |
1130 | } |
1131 | |
1132 | bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) { |
1133 | Index.reset(); |
1134 | return false; |
1135 | } |
1136 | |
1137 | void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1138 | AU.setPreservesAll(); |
1139 | AU.addRequired<BlockFrequencyInfoWrapperPass>(); |
1140 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
1141 | AU.addRequired<StackSafetyInfoWrapperPass>(); |
1142 | } |
1143 | |
1144 | char ImmutableModuleSummaryIndexWrapperPass::ID = 0; |
1145 | |
1146 | ImmutableModuleSummaryIndexWrapperPass::ImmutableModuleSummaryIndexWrapperPass( |
1147 | const ModuleSummaryIndex *Index) |
1148 | : ImmutablePass(ID), Index(Index) { |
1149 | initializeImmutableModuleSummaryIndexWrapperPassPass( |
1150 | *PassRegistry::getPassRegistry()); |
1151 | } |
1152 | |
1153 | void ImmutableModuleSummaryIndexWrapperPass::getAnalysisUsage( |
1154 | AnalysisUsage &AU) const { |
1155 | AU.setPreservesAll(); |
1156 | } |
1157 | |
1158 | ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass( |
1159 | const ModuleSummaryIndex *Index) { |
1160 | return new ImmutableModuleSummaryIndexWrapperPass(Index); |
1161 | } |
1162 | |
1163 | INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info" , |
1164 | "Module summary info" , false, true) |
1165 | |
1166 | bool llvm::mayHaveMemprofSummary(const CallBase *CB) { |
1167 | if (!CB) |
1168 | return false; |
1169 | if (CB->isDebugOrPseudoInst()) |
1170 | return false; |
1171 | auto *CI = dyn_cast<CallInst>(Val: CB); |
1172 | auto *CalledValue = CB->getCalledOperand(); |
1173 | auto *CalledFunction = CB->getCalledFunction(); |
1174 | if (CalledValue && !CalledFunction) { |
1175 | CalledValue = CalledValue->stripPointerCasts(); |
1176 | // Stripping pointer casts can reveal a called function. |
1177 | CalledFunction = dyn_cast<Function>(Val: CalledValue); |
1178 | } |
1179 | // Check if this is an alias to a function. If so, get the |
1180 | // called aliasee for the checks below. |
1181 | if (auto *GA = dyn_cast<GlobalAlias>(Val: CalledValue)) { |
1182 | assert(!CalledFunction && |
1183 | "Expected null called function in callsite for alias" ); |
1184 | CalledFunction = dyn_cast<Function>(Val: GA->getAliaseeObject()); |
1185 | } |
1186 | // Check if this is a direct call to a known function or a known |
1187 | // intrinsic, or an indirect call with profile data. |
1188 | if (CalledFunction) { |
1189 | if (CI && CalledFunction->isIntrinsic()) |
1190 | return false; |
1191 | } else { |
1192 | // TODO: For now skip indirect calls. See comments in |
1193 | // computeFunctionSummary for what is needed to handle this. |
1194 | return false; |
1195 | } |
1196 | return true; |
1197 | } |
1198 | |