1 | //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements PGO instrumentation using a minimum spanning tree based |
10 | // on the following paper: |
11 | // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points |
12 | // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, |
13 | // Issue 3, pp 313-322 |
14 | // The idea of the algorithm based on the fact that for each node (except for |
15 | // the entry and exit), the sum of incoming edge counts equals the sum of |
16 | // outgoing edge counts. The count of edge on spanning tree can be derived from |
17 | // those edges not on the spanning tree. Knuth proves this method instruments |
18 | // the minimum number of edges. |
19 | // |
20 | // The minimal spanning tree here is actually a maximum weight tree -- on-tree |
21 | // edges have higher frequencies (more likely to execute). The idea is to |
22 | // instrument those less frequently executed edges to reduce the runtime |
23 | // overhead of instrumented binaries. |
24 | // |
25 | // This file contains two passes: |
26 | // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge |
27 | // count profile, and generates the instrumentation for indirect call |
28 | // profiling. |
29 | // (2) Pass PGOInstrumentationUse which reads the edge count profile and |
30 | // annotates the branch weights. It also reads the indirect call value |
31 | // profiling records and annotate the indirect call instructions. |
32 | // |
33 | // To get the precise counter information, These two passes need to invoke at |
34 | // the same compilation point (so they see the same IR). For pass |
35 | // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For |
36 | // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and |
37 | // the profile is opened in module level and passed to each PGOUseFunc instance. |
38 | // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put |
39 | // in class FuncPGOInstrumentation. |
40 | // |
41 | // Class PGOEdge represents a CFG edge and some auxiliary information. Class |
42 | // BBInfo contains auxiliary information for each BB. These two classes are used |
43 | // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived |
44 | // class of PGOEdge and BBInfo, respectively. They contains extra data structure |
45 | // used in populating profile counters. |
46 | // The MST implementation is in Class CFGMST (CFGMST.h). |
47 | // |
48 | //===----------------------------------------------------------------------===// |
49 | |
50 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
51 | #include "ValueProfileCollector.h" |
52 | #include "llvm/ADT/APInt.h" |
53 | #include "llvm/ADT/ArrayRef.h" |
54 | #include "llvm/ADT/STLExtras.h" |
55 | #include "llvm/ADT/SmallVector.h" |
56 | #include "llvm/ADT/Statistic.h" |
57 | #include "llvm/ADT/StringRef.h" |
58 | #include "llvm/ADT/Twine.h" |
59 | #include "llvm/ADT/iterator.h" |
60 | #include "llvm/ADT/iterator_range.h" |
61 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
62 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
63 | #include "llvm/Analysis/CFG.h" |
64 | #include "llvm/Analysis/LoopInfo.h" |
65 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
66 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
67 | #include "llvm/Analysis/TargetLibraryInfo.h" |
68 | #include "llvm/IR/Attributes.h" |
69 | #include "llvm/IR/BasicBlock.h" |
70 | #include "llvm/IR/CFG.h" |
71 | #include "llvm/IR/Comdat.h" |
72 | #include "llvm/IR/Constant.h" |
73 | #include "llvm/IR/Constants.h" |
74 | #include "llvm/IR/DiagnosticInfo.h" |
75 | #include "llvm/IR/Dominators.h" |
76 | #include "llvm/IR/EHPersonalities.h" |
77 | #include "llvm/IR/Function.h" |
78 | #include "llvm/IR/GlobalAlias.h" |
79 | #include "llvm/IR/GlobalValue.h" |
80 | #include "llvm/IR/GlobalVariable.h" |
81 | #include "llvm/IR/IRBuilder.h" |
82 | #include "llvm/IR/InstVisitor.h" |
83 | #include "llvm/IR/InstrTypes.h" |
84 | #include "llvm/IR/Instruction.h" |
85 | #include "llvm/IR/Instructions.h" |
86 | #include "llvm/IR/IntrinsicInst.h" |
87 | #include "llvm/IR/Intrinsics.h" |
88 | #include "llvm/IR/LLVMContext.h" |
89 | #include "llvm/IR/MDBuilder.h" |
90 | #include "llvm/IR/Module.h" |
91 | #include "llvm/IR/PassManager.h" |
92 | #include "llvm/IR/ProfDataUtils.h" |
93 | #include "llvm/IR/ProfileSummary.h" |
94 | #include "llvm/IR/Type.h" |
95 | #include "llvm/IR/Value.h" |
96 | #include "llvm/ProfileData/InstrProf.h" |
97 | #include "llvm/ProfileData/InstrProfReader.h" |
98 | #include "llvm/Support/BranchProbability.h" |
99 | #include "llvm/Support/CRC.h" |
100 | #include "llvm/Support/Casting.h" |
101 | #include "llvm/Support/CommandLine.h" |
102 | #include "llvm/Support/DOTGraphTraits.h" |
103 | #include "llvm/Support/Debug.h" |
104 | #include "llvm/Support/Error.h" |
105 | #include "llvm/Support/ErrorHandling.h" |
106 | #include "llvm/Support/GraphWriter.h" |
107 | #include "llvm/Support/VirtualFileSystem.h" |
108 | #include "llvm/Support/raw_ostream.h" |
109 | #include "llvm/TargetParser/Triple.h" |
110 | #include "llvm/Transforms/Instrumentation.h" |
111 | #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" |
112 | #include "llvm/Transforms/Instrumentation/CFGMST.h" |
113 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
114 | #include "llvm/Transforms/Utils/MisExpect.h" |
115 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
116 | #include <algorithm> |
117 | #include <cassert> |
118 | #include <cstdint> |
119 | #include <memory> |
120 | #include <numeric> |
121 | #include <optional> |
122 | #include <string> |
123 | #include <unordered_map> |
124 | #include <utility> |
125 | #include <vector> |
126 | |
127 | using namespace llvm; |
128 | using ProfileCount = Function::ProfileCount; |
129 | using VPCandidateInfo = ValueProfileCollector::CandidateInfo; |
130 | |
131 | #define DEBUG_TYPE "pgo-instrumentation" |
132 | |
133 | STATISTIC(NumOfPGOInstrument, "Number of edges instrumented." ); |
134 | STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented." ); |
135 | STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented." ); |
136 | STATISTIC(NumOfPGOEdge, "Number of edges." ); |
137 | STATISTIC(NumOfPGOBB, "Number of basic-blocks." ); |
138 | STATISTIC(NumOfPGOSplit, "Number of critical edge splits." ); |
139 | STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts." ); |
140 | STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile." ); |
141 | STATISTIC(NumOfPGOMissing, "Number of functions without profile." ); |
142 | STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations." ); |
143 | STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO." ); |
144 | STATISTIC(NumOfCSPGOSelectInsts, |
145 | "Number of select instruction instrumented in CSPGO." ); |
146 | STATISTIC(NumOfCSPGOMemIntrinsics, |
147 | "Number of mem intrinsics instrumented in CSPGO." ); |
148 | STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO." ); |
149 | STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO." ); |
150 | STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO." ); |
151 | STATISTIC(NumOfCSPGOFunc, |
152 | "Number of functions having valid profile counts in CSPGO." ); |
153 | STATISTIC(NumOfCSPGOMismatch, |
154 | "Number of functions having mismatch profile in CSPGO." ); |
155 | STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO." ); |
156 | STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed" ); |
157 | |
158 | // Command line option to specify the file to read profile from. This is |
159 | // mainly used for testing. |
160 | static cl::opt<std::string> |
161 | PGOTestProfileFile("pgo-test-profile-file" , cl::init(Val: "" ), cl::Hidden, |
162 | cl::value_desc("filename" ), |
163 | cl::desc("Specify the path of profile data file. This is" |
164 | "mainly for test purpose." )); |
165 | static cl::opt<std::string> PGOTestProfileRemappingFile( |
166 | "pgo-test-profile-remapping-file" , cl::init(Val: "" ), cl::Hidden, |
167 | cl::value_desc("filename" ), |
168 | cl::desc("Specify the path of profile remapping file. This is mainly for " |
169 | "test purpose." )); |
170 | |
171 | // Command line option to disable value profiling. The default is false: |
172 | // i.e. value profiling is enabled by default. This is for debug purpose. |
173 | static cl::opt<bool> DisableValueProfiling("disable-vp" , cl::init(Val: false), |
174 | cl::Hidden, |
175 | cl::desc("Disable Value Profiling" )); |
176 | |
177 | // Command line option to set the maximum number of VP annotations to write to |
178 | // the metadata for a single indirect call callsite. |
179 | static cl::opt<unsigned> MaxNumAnnotations( |
180 | "icp-max-annotations" , cl::init(Val: 3), cl::Hidden, |
181 | cl::desc("Max number of annotations for a single indirect " |
182 | "call callsite" )); |
183 | |
184 | // Command line option to set the maximum number of value annotations |
185 | // to write to the metadata for a single memop intrinsic. |
186 | static cl::opt<unsigned> MaxNumMemOPAnnotations( |
187 | "memop-max-annotations" , cl::init(Val: 4), cl::Hidden, |
188 | cl::desc("Max number of preicise value annotations for a single memop" |
189 | "intrinsic" )); |
190 | |
191 | // Command line option to control appending FunctionHash to the name of a COMDAT |
192 | // function. This is to avoid the hash mismatch caused by the preinliner. |
193 | static cl::opt<bool> DoComdatRenaming( |
194 | "do-comdat-renaming" , cl::init(Val: false), cl::Hidden, |
195 | cl::desc("Append function hash to the name of COMDAT function to avoid " |
196 | "function hash mismatch due to the preinliner" )); |
197 | |
198 | namespace llvm { |
199 | // Command line option to enable/disable the warning about missing profile |
200 | // information. |
201 | cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function" , cl::init(Val: false), |
202 | cl::Hidden, |
203 | cl::desc("Use this option to turn on/off " |
204 | "warnings about missing profile data for " |
205 | "functions." )); |
206 | |
207 | // Command line option to enable/disable the warning about a hash mismatch in |
208 | // the profile data. |
209 | cl::opt<bool> |
210 | NoPGOWarnMismatch("no-pgo-warn-mismatch" , cl::init(Val: false), cl::Hidden, |
211 | cl::desc("Use this option to turn off/on " |
212 | "warnings about profile cfg mismatch." )); |
213 | |
214 | // Command line option to enable/disable the warning about a hash mismatch in |
215 | // the profile data for Comdat functions, which often turns out to be false |
216 | // positive due to the pre-instrumentation inline. |
217 | cl::opt<bool> NoPGOWarnMismatchComdatWeak( |
218 | "no-pgo-warn-mismatch-comdat-weak" , cl::init(Val: true), cl::Hidden, |
219 | cl::desc("The option is used to turn on/off " |
220 | "warnings about hash mismatch for comdat " |
221 | "or weak functions." )); |
222 | } // namespace llvm |
223 | |
224 | // Command line option to enable/disable select instruction instrumentation. |
225 | static cl::opt<bool> |
226 | PGOInstrSelect("pgo-instr-select" , cl::init(Val: true), cl::Hidden, |
227 | cl::desc("Use this option to turn on/off SELECT " |
228 | "instruction instrumentation. " )); |
229 | |
230 | // Command line option to turn on CFG dot or text dump of raw profile counts |
231 | static cl::opt<PGOViewCountsType> PGOViewRawCounts( |
232 | "pgo-view-raw-counts" , cl::Hidden, |
233 | cl::desc("A boolean option to show CFG dag or text " |
234 | "with raw profile counts from " |
235 | "profile data. See also option " |
236 | "-pgo-view-counts. To limit graph " |
237 | "display to only one function, use " |
238 | "filtering option -view-bfi-func-name." ), |
239 | cl::values(clEnumValN(PGOVCT_None, "none" , "do not show." ), |
240 | clEnumValN(PGOVCT_Graph, "graph" , "show a graph." ), |
241 | clEnumValN(PGOVCT_Text, "text" , "show in text." ))); |
242 | |
243 | // Command line option to enable/disable memop intrinsic call.size profiling. |
244 | static cl::opt<bool> |
245 | PGOInstrMemOP("pgo-instr-memop" , cl::init(Val: true), cl::Hidden, |
246 | cl::desc("Use this option to turn on/off " |
247 | "memory intrinsic size profiling." )); |
248 | |
249 | // Emit branch probability as optimization remarks. |
250 | static cl::opt<bool> |
251 | EmitBranchProbability("pgo-emit-branch-prob" , cl::init(Val: false), cl::Hidden, |
252 | cl::desc("When this option is on, the annotated " |
253 | "branch probability will be emitted as " |
254 | "optimization remarks: -{Rpass|" |
255 | "pass-remarks}=pgo-instrumentation" )); |
256 | |
257 | static cl::opt<bool> PGOInstrumentEntry( |
258 | "pgo-instrument-entry" , cl::init(Val: false), cl::Hidden, |
259 | cl::desc("Force to instrument function entry basicblock." )); |
260 | |
261 | static cl::opt<bool> PGOFunctionEntryCoverage( |
262 | "pgo-function-entry-coverage" , cl::Hidden, |
263 | cl::desc( |
264 | "Use this option to enable function entry coverage instrumentation." )); |
265 | |
266 | static cl::opt<bool> PGOBlockCoverage( |
267 | "pgo-block-coverage" , |
268 | cl::desc("Use this option to enable basic block coverage instrumentation" )); |
269 | |
270 | static cl::opt<bool> |
271 | PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph" , |
272 | cl::desc("Create a dot file of CFGs with block " |
273 | "coverage inference information" )); |
274 | |
275 | static cl::opt<bool> PGOTemporalInstrumentation( |
276 | "pgo-temporal-instrumentation" , |
277 | cl::desc("Use this option to enable temporal instrumentation" )); |
278 | |
279 | static cl::opt<bool> |
280 | PGOFixEntryCount("pgo-fix-entry-count" , cl::init(Val: true), cl::Hidden, |
281 | cl::desc("Fix function entry count in profile use." )); |
282 | |
283 | static cl::opt<bool> PGOVerifyHotBFI( |
284 | "pgo-verify-hot-bfi" , cl::init(Val: false), cl::Hidden, |
285 | cl::desc("Print out the non-match BFI count if a hot raw profile count " |
286 | "becomes non-hot, or a cold raw profile count becomes hot. " |
287 | "The print is enabled under -Rpass-analysis=pgo, or " |
288 | "internal option -pass-remakrs-analysis=pgo." )); |
289 | |
290 | static cl::opt<bool> PGOVerifyBFI( |
291 | "pgo-verify-bfi" , cl::init(Val: false), cl::Hidden, |
292 | cl::desc("Print out mismatched BFI counts after setting profile metadata " |
293 | "The print is enabled under -Rpass-analysis=pgo, or " |
294 | "internal option -pass-remakrs-analysis=pgo." )); |
295 | |
296 | static cl::opt<unsigned> PGOVerifyBFIRatio( |
297 | "pgo-verify-bfi-ratio" , cl::init(Val: 2), cl::Hidden, |
298 | cl::desc("Set the threshold for pgo-verify-bfi: only print out " |
299 | "mismatched BFI if the difference percentage is greater than " |
300 | "this value (in percentage)." )); |
301 | |
302 | static cl::opt<unsigned> PGOVerifyBFICutoff( |
303 | "pgo-verify-bfi-cutoff" , cl::init(Val: 5), cl::Hidden, |
304 | cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " |
305 | "profile count value is below." )); |
306 | |
307 | static cl::opt<std::string> PGOTraceFuncHash( |
308 | "pgo-trace-func-hash" , cl::init(Val: "-" ), cl::Hidden, |
309 | cl::value_desc("function name" ), |
310 | cl::desc("Trace the hash of the function with this name." )); |
311 | |
312 | static cl::opt<unsigned> PGOFunctionSizeThreshold( |
313 | "pgo-function-size-threshold" , cl::Hidden, |
314 | cl::desc("Do not instrument functions smaller than this threshold." )); |
315 | |
316 | static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( |
317 | "pgo-critical-edge-threshold" , cl::init(Val: 20000), cl::Hidden, |
318 | cl::desc("Do not instrument functions with the number of critical edges " |
319 | " greater than this threshold." )); |
320 | |
321 | namespace llvm { |
322 | // Command line option to turn on CFG dot dump after profile annotation. |
323 | // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts |
324 | extern cl::opt<PGOViewCountsType> PGOViewCounts; |
325 | |
326 | // Command line option to specify the name of the function for CFG dump |
327 | // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= |
328 | extern cl::opt<std::string> ViewBlockFreqFuncName; |
329 | |
330 | // Command line option to enable vtable value profiling. Defined in |
331 | // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= |
332 | extern cl::opt<bool> EnableVTableValueProfiling; |
333 | extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; |
334 | } // namespace llvm |
335 | |
336 | // Return a string describing the branch condition that can be |
337 | // used in static branch probability heuristics: |
338 | static std::string getBranchCondString(Instruction *TI) { |
339 | BranchInst *BI = dyn_cast<BranchInst>(Val: TI); |
340 | if (!BI || !BI->isConditional()) |
341 | return std::string(); |
342 | |
343 | Value *Cond = BI->getCondition(); |
344 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
345 | if (!CI) |
346 | return std::string(); |
347 | |
348 | std::string result; |
349 | raw_string_ostream OS(result); |
350 | OS << CI->getPredicate() << "_" ; |
351 | CI->getOperand(i_nocapture: 0)->getType()->print(O&: OS, IsForDebug: true); |
352 | |
353 | Value *RHS = CI->getOperand(i_nocapture: 1); |
354 | ConstantInt *CV = dyn_cast<ConstantInt>(Val: RHS); |
355 | if (CV) { |
356 | if (CV->isZero()) |
357 | OS << "_Zero" ; |
358 | else if (CV->isOne()) |
359 | OS << "_One" ; |
360 | else if (CV->isMinusOne()) |
361 | OS << "_MinusOne" ; |
362 | else |
363 | OS << "_Const" ; |
364 | } |
365 | OS.flush(); |
366 | return result; |
367 | } |
368 | |
369 | static const char *ValueProfKindDescr[] = { |
370 | #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, |
371 | #include "llvm/ProfileData/InstrProfData.inc" |
372 | }; |
373 | |
374 | // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime |
375 | // aware this is an ir_level profile so it can set the version flag. |
376 | static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { |
377 | const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); |
378 | Type *IntTy64 = Type::getInt64Ty(C&: M.getContext()); |
379 | uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); |
380 | if (IsCS) |
381 | ProfileVersion |= VARIANT_MASK_CSIR_PROF; |
382 | if (PGOInstrumentEntry) |
383 | ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; |
384 | if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) |
385 | ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; |
386 | if (PGOFunctionEntryCoverage) |
387 | ProfileVersion |= |
388 | VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; |
389 | if (PGOBlockCoverage) |
390 | ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; |
391 | if (PGOTemporalInstrumentation) |
392 | ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; |
393 | auto IRLevelVersionVariable = new GlobalVariable( |
394 | M, IntTy64, true, GlobalValue::WeakAnyLinkage, |
395 | Constant::getIntegerValue(Ty: IntTy64, V: APInt(64, ProfileVersion)), VarName); |
396 | IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); |
397 | Triple TT(M.getTargetTriple()); |
398 | if (TT.supportsCOMDAT()) { |
399 | IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); |
400 | IRLevelVersionVariable->setComdat(M.getOrInsertComdat(Name: VarName)); |
401 | } |
402 | return IRLevelVersionVariable; |
403 | } |
404 | |
405 | namespace { |
406 | |
407 | /// The select instruction visitor plays three roles specified |
408 | /// by the mode. In \c VM_counting mode, it simply counts the number of |
409 | /// select instructions. In \c VM_instrument mode, it inserts code to count |
410 | /// the number times TrueValue of select is taken. In \c VM_annotate mode, |
411 | /// it reads the profile data and annotate the select instruction with metadata. |
412 | enum VisitMode { VM_counting, VM_instrument, VM_annotate }; |
413 | class PGOUseFunc; |
414 | |
415 | /// Instruction Visitor class to visit select instructions. |
416 | struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { |
417 | Function &F; |
418 | unsigned NSIs = 0; // Number of select instructions instrumented. |
419 | VisitMode Mode = VM_counting; // Visiting mode. |
420 | unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. |
421 | unsigned TotalNumCtrs = 0; // Total number of counters |
422 | GlobalVariable *FuncNameVar = nullptr; |
423 | uint64_t FuncHash = 0; |
424 | PGOUseFunc *UseFunc = nullptr; |
425 | bool HasSingleByteCoverage; |
426 | |
427 | SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) |
428 | : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} |
429 | |
430 | void countSelects() { |
431 | NSIs = 0; |
432 | Mode = VM_counting; |
433 | visit(F); |
434 | } |
435 | |
436 | // Visit the IR stream and instrument all select instructions. \p |
437 | // Ind is a pointer to the counter index variable; \p TotalNC |
438 | // is the total number of counters; \p FNV is the pointer to the |
439 | // PGO function name var; \p FHash is the function hash. |
440 | void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV, |
441 | uint64_t FHash) { |
442 | Mode = VM_instrument; |
443 | CurCtrIdx = Ind; |
444 | TotalNumCtrs = TotalNC; |
445 | FuncHash = FHash; |
446 | FuncNameVar = FNV; |
447 | visit(F); |
448 | } |
449 | |
450 | // Visit the IR stream and annotate all select instructions. |
451 | void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { |
452 | Mode = VM_annotate; |
453 | UseFunc = UF; |
454 | CurCtrIdx = Ind; |
455 | visit(F); |
456 | } |
457 | |
458 | void instrumentOneSelectInst(SelectInst &SI); |
459 | void annotateOneSelectInst(SelectInst &SI); |
460 | |
461 | // Visit \p SI instruction and perform tasks according to visit mode. |
462 | void visitSelectInst(SelectInst &SI); |
463 | |
464 | // Return the number of select instructions. This needs be called after |
465 | // countSelects(). |
466 | unsigned getNumOfSelectInsts() const { return NSIs; } |
467 | }; |
468 | |
469 | /// This class implements the CFG edges for the Minimum Spanning Tree (MST) |
470 | /// based instrumentation. |
471 | /// Note that the CFG can be a multi-graph. So there might be multiple edges |
472 | /// with the same SrcBB and DestBB. |
473 | struct PGOEdge { |
474 | BasicBlock *SrcBB; |
475 | BasicBlock *DestBB; |
476 | uint64_t Weight; |
477 | bool InMST = false; |
478 | bool Removed = false; |
479 | bool IsCritical = false; |
480 | |
481 | PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) |
482 | : SrcBB(Src), DestBB(Dest), Weight(W) {} |
483 | |
484 | /// Return the information string of an edge. |
485 | std::string infoString() const { |
486 | return (Twine(Removed ? "-" : " " ) + (InMST ? " " : "*" ) + |
487 | (IsCritical ? "c" : " " ) + " W=" + Twine(Weight)) |
488 | .str(); |
489 | } |
490 | }; |
491 | |
492 | /// This class stores the auxiliary information for each BB in the MST. |
493 | struct PGOBBInfo { |
494 | PGOBBInfo *Group; |
495 | uint32_t Index; |
496 | uint32_t Rank = 0; |
497 | |
498 | PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} |
499 | |
500 | /// Return the information string of this object. |
501 | std::string infoString() const { |
502 | return (Twine("Index=" ) + Twine(Index)).str(); |
503 | } |
504 | }; |
505 | |
506 | // This class implements the CFG edges. Note the CFG can be a multi-graph. |
507 | template <class Edge, class BBInfo> class FuncPGOInstrumentation { |
508 | private: |
509 | Function &F; |
510 | |
511 | // Is this is context-sensitive instrumentation. |
512 | bool IsCS; |
513 | |
514 | // A map that stores the Comdat group in function F. |
515 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; |
516 | |
517 | ValueProfileCollector VPC; |
518 | |
519 | void computeCFGHash(); |
520 | void renameComdatFunction(); |
521 | |
522 | public: |
523 | const TargetLibraryInfo &TLI; |
524 | std::vector<std::vector<VPCandidateInfo>> ValueSites; |
525 | SelectInstVisitor SIVisitor; |
526 | std::string FuncName; |
527 | std::string DeprecatedFuncName; |
528 | GlobalVariable *FuncNameVar; |
529 | |
530 | // CFG hash value for this function. |
531 | uint64_t FunctionHash = 0; |
532 | |
533 | // The Minimum Spanning Tree of function CFG. |
534 | CFGMST<Edge, BBInfo> MST; |
535 | |
536 | const std::optional<BlockCoverageInference> BCI; |
537 | |
538 | static std::optional<BlockCoverageInference> |
539 | constructBCI(Function &Func, bool HasSingleByteCoverage, |
540 | bool InstrumentFuncEntry) { |
541 | if (HasSingleByteCoverage) |
542 | return BlockCoverageInference(Func, InstrumentFuncEntry); |
543 | return {}; |
544 | } |
545 | |
546 | // Collect all the BBs that will be instrumented, and store them in |
547 | // InstrumentBBs. |
548 | void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); |
549 | |
550 | // Give an edge, find the BB that will be instrumented. |
551 | // Return nullptr if there is no BB to be instrumented. |
552 | BasicBlock *getInstrBB(Edge *E); |
553 | |
554 | // Return the auxiliary BB information. |
555 | BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } |
556 | |
557 | // Return the auxiliary BB information if available. |
558 | BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } |
559 | |
560 | // Dump edges and BB information. |
561 | void dumpInfo(StringRef Str = "" ) const { |
562 | MST.dumpEdges(dbgs(), Twine("Dump Function " ) + FuncName + |
563 | " Hash: " + Twine(FunctionHash) + "\t" + Str); |
564 | } |
565 | |
566 | FuncPGOInstrumentation( |
567 | Function &Func, TargetLibraryInfo &TLI, |
568 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
569 | bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, |
570 | BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, |
571 | bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) |
572 | : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), |
573 | TLI(TLI), ValueSites(IPVK_Last + 1), |
574 | SIVisitor(Func, HasSingleByteCoverage), |
575 | MST(F, InstrumentFuncEntry, BPI, BFI), |
576 | BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { |
577 | if (BCI && PGOViewBlockCoverageGraph) |
578 | BCI->viewBlockCoverageGraph(); |
579 | // This should be done before CFG hash computation. |
580 | SIVisitor.countSelects(); |
581 | ValueSites[IPVK_MemOPSize] = VPC.get(Kind: IPVK_MemOPSize); |
582 | if (!IsCS) { |
583 | NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); |
584 | NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); |
585 | NumOfPGOBB += MST.bbInfoSize(); |
586 | ValueSites[IPVK_IndirectCallTarget] = VPC.get(Kind: IPVK_IndirectCallTarget); |
587 | if (EnableVTableValueProfiling) |
588 | ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget); |
589 | } else { |
590 | NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); |
591 | NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); |
592 | NumOfCSPGOBB += MST.bbInfoSize(); |
593 | } |
594 | |
595 | FuncName = getIRPGOFuncName(F); |
596 | DeprecatedFuncName = getPGOFuncName(F); |
597 | computeCFGHash(); |
598 | if (!ComdatMembers.empty()) |
599 | renameComdatFunction(); |
600 | LLVM_DEBUG(dumpInfo("after CFGMST" )); |
601 | |
602 | for (const auto &E : MST.allEdges()) { |
603 | if (E->Removed) |
604 | continue; |
605 | IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; |
606 | if (!E->InMST) |
607 | IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; |
608 | } |
609 | |
610 | if (CreateGlobalVar) |
611 | FuncNameVar = createPGOFuncNameVar(F, PGOFuncName: FuncName); |
612 | } |
613 | }; |
614 | |
615 | } // end anonymous namespace |
616 | |
617 | // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index |
618 | // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers |
619 | // of selects, indirect calls, mem ops and edges. |
620 | template <class Edge, class BBInfo> |
621 | void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { |
622 | std::vector<uint8_t> Indexes; |
623 | JamCRC JC; |
624 | for (auto &BB : F) { |
625 | for (BasicBlock *Succ : successors(BB: &BB)) { |
626 | auto BI = findBBInfo(BB: Succ); |
627 | if (BI == nullptr) |
628 | continue; |
629 | uint32_t Index = BI->Index; |
630 | for (int J = 0; J < 4; J++) |
631 | Indexes.push_back(x: (uint8_t)(Index >> (J * 8))); |
632 | } |
633 | } |
634 | JC.update(Data: Indexes); |
635 | |
636 | JamCRC JCH; |
637 | // The higher 32 bits. |
638 | auto updateJCH = [&JCH](uint64_t Num) { |
639 | uint8_t Data[8]; |
640 | support::endian::write64le(P: Data, V: Num); |
641 | JCH.update(Data); |
642 | }; |
643 | updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); |
644 | updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); |
645 | updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); |
646 | if (BCI) { |
647 | updateJCH(BCI->getInstrumentedBlocksHash()); |
648 | } else { |
649 | updateJCH((uint64_t)MST.numEdges()); |
650 | } |
651 | |
652 | // Hash format for context sensitive profile. Reserve 4 bits for other |
653 | // information. |
654 | FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); |
655 | |
656 | // Reserve bit 60-63 for other information purpose. |
657 | FunctionHash &= 0x0FFFFFFFFFFFFFFF; |
658 | if (IsCS) |
659 | NamedInstrProfRecord::setCSFlagInHash(FunctionHash); |
660 | LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" |
661 | << " CRC = " << JC.getCRC() |
662 | << ", Selects = " << SIVisitor.getNumOfSelectInsts() |
663 | << ", Edges = " << MST.numEdges() << ", ICSites = " |
664 | << ValueSites[IPVK_IndirectCallTarget].size() |
665 | << ", Memops = " << ValueSites[IPVK_MemOPSize].size() |
666 | << ", High32 CRC = " << JCH.getCRC() |
667 | << ", Hash = " << FunctionHash << "\n" ;); |
668 | |
669 | if (PGOTraceFuncHash != "-" && F.getName().contains(Other: PGOTraceFuncHash)) |
670 | dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash |
671 | << " in building " << F.getParent()->getSourceFileName() << "\n" ; |
672 | } |
673 | |
674 | // Check if we can safely rename this Comdat function. |
675 | static bool canRenameComdat( |
676 | Function &F, |
677 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { |
678 | if (!DoComdatRenaming || !canRenameComdatFunc(F, CheckAddressTaken: true)) |
679 | return false; |
680 | |
681 | // FIXME: Current only handle those Comdat groups that only containing one |
682 | // function. |
683 | // (1) For a Comdat group containing multiple functions, we need to have a |
684 | // unique postfix based on the hashes for each function. There is a |
685 | // non-trivial code refactoring to do this efficiently. |
686 | // (2) Variables can not be renamed, so we can not rename Comdat function in a |
687 | // group including global vars. |
688 | Comdat *C = F.getComdat(); |
689 | for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: C))) { |
690 | assert(!isa<GlobalAlias>(CM.second)); |
691 | Function *FM = dyn_cast<Function>(Val: CM.second); |
692 | if (FM != &F) |
693 | return false; |
694 | } |
695 | return true; |
696 | } |
697 | |
698 | // Append the CFGHash to the Comdat function name. |
699 | template <class Edge, class BBInfo> |
700 | void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { |
701 | if (!canRenameComdat(F, ComdatMembers)) |
702 | return; |
703 | std::string OrigName = F.getName().str(); |
704 | std::string NewFuncName = |
705 | Twine(F.getName() + "." + Twine(FunctionHash)).str(); |
706 | F.setName(Twine(NewFuncName)); |
707 | GlobalAlias::create(Linkage: GlobalValue::WeakAnyLinkage, Name: OrigName, Aliasee: &F); |
708 | FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); |
709 | Comdat *NewComdat; |
710 | Module *M = F.getParent(); |
711 | // For AvailableExternallyLinkage functions, change the linkage to |
712 | // LinkOnceODR and put them into comdat. This is because after renaming, there |
713 | // is no backup external copy available for the function. |
714 | if (!F.hasComdat()) { |
715 | assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); |
716 | NewComdat = M->getOrInsertComdat(Name: StringRef(NewFuncName)); |
717 | F.setLinkage(GlobalValue::LinkOnceODRLinkage); |
718 | F.setComdat(NewComdat); |
719 | return; |
720 | } |
721 | |
722 | // This function belongs to a single function Comdat group. |
723 | Comdat *OrigComdat = F.getComdat(); |
724 | std::string NewComdatName = |
725 | Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); |
726 | NewComdat = M->getOrInsertComdat(Name: StringRef(NewComdatName)); |
727 | NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); |
728 | |
729 | for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: OrigComdat))) { |
730 | // Must be a function. |
731 | cast<Function>(Val: CM.second)->setComdat(NewComdat); |
732 | } |
733 | } |
734 | |
735 | /// Collect all the BBs that will be instruments and add them to |
736 | /// `InstrumentBBs`. |
737 | template <class Edge, class BBInfo> |
738 | void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( |
739 | std::vector<BasicBlock *> &InstrumentBBs) { |
740 | if (BCI) { |
741 | for (auto &BB : F) |
742 | if (BCI->shouldInstrumentBlock(BB)) |
743 | InstrumentBBs.push_back(x: &BB); |
744 | return; |
745 | } |
746 | |
747 | // Use a worklist as we will update the vector during the iteration. |
748 | std::vector<Edge *> EdgeList; |
749 | EdgeList.reserve(MST.numEdges()); |
750 | for (const auto &E : MST.allEdges()) |
751 | EdgeList.push_back(E.get()); |
752 | |
753 | for (auto &E : EdgeList) { |
754 | BasicBlock *InstrBB = getInstrBB(E); |
755 | if (InstrBB) |
756 | InstrumentBBs.push_back(x: InstrBB); |
757 | } |
758 | } |
759 | |
760 | // Given a CFG E to be instrumented, find which BB to place the instrumented |
761 | // code. The function will split the critical edge if necessary. |
762 | template <class Edge, class BBInfo> |
763 | BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { |
764 | if (E->InMST || E->Removed) |
765 | return nullptr; |
766 | |
767 | BasicBlock *SrcBB = E->SrcBB; |
768 | BasicBlock *DestBB = E->DestBB; |
769 | // For a fake edge, instrument the real BB. |
770 | if (SrcBB == nullptr) |
771 | return DestBB; |
772 | if (DestBB == nullptr) |
773 | return SrcBB; |
774 | |
775 | auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { |
776 | // There are basic blocks (such as catchswitch) cannot be instrumented. |
777 | // If the returned first insertion point is the end of BB, skip this BB. |
778 | if (BB->getFirstInsertionPt() == BB->end()) |
779 | return nullptr; |
780 | return BB; |
781 | }; |
782 | |
783 | // Instrument the SrcBB if it has a single successor, |
784 | // otherwise, the DestBB if this is not a critical edge. |
785 | Instruction *TI = SrcBB->getTerminator(); |
786 | if (TI->getNumSuccessors() <= 1) |
787 | return canInstrument(SrcBB); |
788 | if (!E->IsCritical) |
789 | return canInstrument(DestBB); |
790 | |
791 | // Some IndirectBr critical edges cannot be split by the previous |
792 | // SplitIndirectBrCriticalEdges call. Bail out. |
793 | unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB); |
794 | BasicBlock *InstrBB = |
795 | isa<IndirectBrInst>(Val: TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); |
796 | if (!InstrBB) { |
797 | LLVM_DEBUG( |
798 | dbgs() << "Fail to split critical edge: not instrument this edge.\n" ); |
799 | return nullptr; |
800 | } |
801 | // For a critical edge, we have to split. Instrument the newly |
802 | // created BB. |
803 | IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; |
804 | LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index |
805 | << " --> " << getBBInfo(DestBB).Index << "\n" ); |
806 | // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. |
807 | MST.addEdge(SrcBB, InstrBB, 0); |
808 | // Second one: Add new edge of InstrBB->DestBB. |
809 | Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); |
810 | NewEdge1.InMST = true; |
811 | E->Removed = true; |
812 | |
813 | return canInstrument(InstrBB); |
814 | } |
815 | |
816 | // When generating value profiling calls on Windows routines that make use of |
817 | // handler funclets for exception processing an operand bundle needs to attached |
818 | // to the called function. This routine will set \p OpBundles to contain the |
819 | // funclet information, if any is needed, that should be placed on the generated |
820 | // value profiling call for the value profile candidate call. |
821 | static void |
822 | populateEHOperandBundle(VPCandidateInfo &Cand, |
823 | DenseMap<BasicBlock *, ColorVector> &BlockColors, |
824 | SmallVectorImpl<OperandBundleDef> &OpBundles) { |
825 | auto *OrigCall = dyn_cast<CallBase>(Val: Cand.AnnotatedInst); |
826 | if (!OrigCall) |
827 | return; |
828 | |
829 | if (!isa<IntrinsicInst>(Val: OrigCall)) { |
830 | // The instrumentation call should belong to the same funclet as a |
831 | // non-intrinsic call, so just copy the operand bundle, if any exists. |
832 | std::optional<OperandBundleUse> ParentFunclet = |
833 | OrigCall->getOperandBundle(ID: LLVMContext::OB_funclet); |
834 | if (ParentFunclet) |
835 | OpBundles.emplace_back(Args: OperandBundleDef(*ParentFunclet)); |
836 | } else { |
837 | // Intrinsics or other instructions do not get funclet information from the |
838 | // front-end. Need to use the BlockColors that was computed by the routine |
839 | // colorEHFunclets to determine whether a funclet is needed. |
840 | if (!BlockColors.empty()) { |
841 | const ColorVector &CV = BlockColors.find(Val: OrigCall->getParent())->second; |
842 | assert(CV.size() == 1 && "non-unique color for block!" ); |
843 | Instruction *EHPad = CV.front()->getFirstNonPHI(); |
844 | if (EHPad->isEHPad()) |
845 | OpBundles.emplace_back(Args: "funclet" , Args&: EHPad); |
846 | } |
847 | } |
848 | } |
849 | |
850 | // Visit all edge and instrument the edges not in MST, and do value profiling. |
851 | // Critical edges will be split. |
852 | static void instrumentOneFunc( |
853 | Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, |
854 | BlockFrequencyInfo *BFI, |
855 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
856 | bool IsCS) { |
857 | if (!PGOBlockCoverage) { |
858 | // Split indirectbr critical edges here before computing the MST rather than |
859 | // later in getInstrBB() to avoid invalidating it. |
860 | SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); |
861 | } |
862 | |
863 | FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( |
864 | F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, |
865 | PGOBlockCoverage); |
866 | |
867 | auto Name = FuncInfo.FuncNameVar; |
868 | auto CFGHash = ConstantInt::get(Ty: Type::getInt64Ty(C&: M->getContext()), |
869 | V: FuncInfo.FunctionHash); |
870 | if (PGOFunctionEntryCoverage) { |
871 | auto &EntryBB = F.getEntryBlock(); |
872 | IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); |
873 | // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, |
874 | // i32 <index>) |
875 | Builder.CreateCall( |
876 | Intrinsic::getDeclaration(M, Intrinsic::id: instrprof_cover), |
877 | {Name, CFGHash, Builder.getInt32(C: 1), Builder.getInt32(C: 0)}); |
878 | return; |
879 | } |
880 | |
881 | std::vector<BasicBlock *> InstrumentBBs; |
882 | FuncInfo.getInstrumentBBs(InstrumentBBs); |
883 | unsigned NumCounters = |
884 | InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); |
885 | |
886 | uint32_t I = 0; |
887 | if (PGOTemporalInstrumentation) { |
888 | NumCounters += PGOBlockCoverage ? 8 : 1; |
889 | auto &EntryBB = F.getEntryBlock(); |
890 | IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); |
891 | // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, |
892 | // i32 <index>) |
893 | Builder.CreateCall( |
894 | Intrinsic::getDeclaration(M, Intrinsic::id: instrprof_timestamp), |
895 | {Name, CFGHash, Builder.getInt32(C: NumCounters), Builder.getInt32(C: I)}); |
896 | I += PGOBlockCoverage ? 8 : 1; |
897 | } |
898 | |
899 | for (auto *InstrBB : InstrumentBBs) { |
900 | IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); |
901 | assert(Builder.GetInsertPoint() != InstrBB->end() && |
902 | "Cannot get the Instrumentation point" ); |
903 | // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, |
904 | // i32 <index>) |
905 | Builder.CreateCall( |
906 | Intrinsic::getDeclaration(M, id: PGOBlockCoverage |
907 | ? Intrinsic::instrprof_cover |
908 | : Intrinsic::instrprof_increment), |
909 | {Name, CFGHash, Builder.getInt32(C: NumCounters), Builder.getInt32(C: I++)}); |
910 | } |
911 | |
912 | // Now instrument select instructions: |
913 | FuncInfo.SIVisitor.instrumentSelects(Ind: &I, TotalNC: NumCounters, FNV: FuncInfo.FuncNameVar, |
914 | FHash: FuncInfo.FunctionHash); |
915 | assert(I == NumCounters); |
916 | |
917 | if (DisableValueProfiling) |
918 | return; |
919 | |
920 | NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); |
921 | |
922 | // Intrinsic function calls do not have funclet operand bundles needed for |
923 | // Windows exception handling attached to them. However, if value profiling is |
924 | // inserted for one of these calls, then a funclet value will need to be set |
925 | // on the instrumentation call based on the funclet coloring. |
926 | DenseMap<BasicBlock *, ColorVector> BlockColors; |
927 | if (F.hasPersonalityFn() && |
928 | isScopedEHPersonality(Pers: classifyEHPersonality(Pers: F.getPersonalityFn()))) |
929 | BlockColors = colorEHFunclets(F); |
930 | |
931 | // For each VP Kind, walk the VP candidates and instrument each one. |
932 | for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { |
933 | unsigned SiteIndex = 0; |
934 | if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) |
935 | continue; |
936 | |
937 | for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { |
938 | LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] |
939 | << " site: CallSite Index = " << SiteIndex << "\n" ); |
940 | |
941 | IRBuilder<> Builder(Cand.InsertPt); |
942 | assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && |
943 | "Cannot get the Instrumentation point" ); |
944 | |
945 | Value *ToProfile = nullptr; |
946 | if (Cand.V->getType()->isIntegerTy()) |
947 | ToProfile = Builder.CreateZExtOrTrunc(V: Cand.V, DestTy: Builder.getInt64Ty()); |
948 | else if (Cand.V->getType()->isPointerTy()) |
949 | ToProfile = Builder.CreatePtrToInt(V: Cand.V, DestTy: Builder.getInt64Ty()); |
950 | assert(ToProfile && "value profiling Value is of unexpected type" ); |
951 | |
952 | SmallVector<OperandBundleDef, 1> OpBundles; |
953 | populateEHOperandBundle(Cand, BlockColors, OpBundles); |
954 | Builder.CreateCall( |
955 | Intrinsic::getDeclaration(M, Intrinsic::id: instrprof_value_profile), |
956 | {FuncInfo.FuncNameVar, Builder.getInt64(C: FuncInfo.FunctionHash), |
957 | ToProfile, Builder.getInt32(C: Kind), Builder.getInt32(C: SiteIndex++)}, |
958 | OpBundles); |
959 | } |
960 | } // IPVK_First <= Kind <= IPVK_Last |
961 | } |
962 | |
963 | namespace { |
964 | |
965 | // This class represents a CFG edge in profile use compilation. |
966 | struct PGOUseEdge : public PGOEdge { |
967 | using PGOEdge::PGOEdge; |
968 | |
969 | std::optional<uint64_t> Count; |
970 | |
971 | // Set edge count value |
972 | void setEdgeCount(uint64_t Value) { Count = Value; } |
973 | |
974 | // Return the information string for this object. |
975 | std::string infoString() const { |
976 | if (!Count) |
977 | return PGOEdge::infoString(); |
978 | return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str(); |
979 | } |
980 | }; |
981 | |
982 | using DirectEdges = SmallVector<PGOUseEdge *, 2>; |
983 | |
984 | // This class stores the auxiliary information for each BB. |
985 | struct PGOUseBBInfo : public PGOBBInfo { |
986 | std::optional<uint64_t> Count; |
987 | int32_t UnknownCountInEdge = 0; |
988 | int32_t UnknownCountOutEdge = 0; |
989 | DirectEdges InEdges; |
990 | DirectEdges OutEdges; |
991 | |
992 | PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {} |
993 | |
994 | // Set the profile count value for this BB. |
995 | void setBBInfoCount(uint64_t Value) { Count = Value; } |
996 | |
997 | // Return the information string of this object. |
998 | std::string infoString() const { |
999 | if (!Count) |
1000 | return PGOBBInfo::infoString(); |
1001 | return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str(); |
1002 | } |
1003 | |
1004 | // Add an OutEdge and update the edge count. |
1005 | void addOutEdge(PGOUseEdge *E) { |
1006 | OutEdges.push_back(Elt: E); |
1007 | UnknownCountOutEdge++; |
1008 | } |
1009 | |
1010 | // Add an InEdge and update the edge count. |
1011 | void addInEdge(PGOUseEdge *E) { |
1012 | InEdges.push_back(Elt: E); |
1013 | UnknownCountInEdge++; |
1014 | } |
1015 | }; |
1016 | |
1017 | } // end anonymous namespace |
1018 | |
1019 | // Sum up the count values for all the edges. |
1020 | static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { |
1021 | uint64_t Total = 0; |
1022 | for (const auto &E : Edges) { |
1023 | if (E->Removed) |
1024 | continue; |
1025 | if (E->Count) |
1026 | Total += *E->Count; |
1027 | } |
1028 | return Total; |
1029 | } |
1030 | |
1031 | namespace { |
1032 | |
1033 | class PGOUseFunc { |
1034 | public: |
1035 | PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, |
1036 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
1037 | BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, |
1038 | ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, |
1039 | bool HasSingleByteCoverage) |
1040 | : F(Func), M(Modu), BFI(BFIin), PSI(PSI), |
1041 | FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, |
1042 | InstrumentFuncEntry, HasSingleByteCoverage), |
1043 | FreqAttr(FFA_Normal), IsCS(IsCS) {} |
1044 | |
1045 | void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); |
1046 | |
1047 | // Read counts for the instrumented BB from profile. |
1048 | bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, |
1049 | InstrProfRecord::CountPseudoKind &PseudoKind); |
1050 | |
1051 | // Populate the counts for all BBs. |
1052 | void populateCounters(); |
1053 | |
1054 | // Set block coverage based on profile coverage values. |
1055 | void populateCoverage(IndexedInstrProfReader *PGOReader); |
1056 | |
1057 | // Set the branch weights based on the count values. |
1058 | void setBranchWeights(); |
1059 | |
1060 | // Annotate the value profile call sites for all value kind. |
1061 | void annotateValueSites(); |
1062 | |
1063 | // Annotate the value profile call sites for one value kind. |
1064 | void annotateValueSites(uint32_t Kind); |
1065 | |
1066 | // Annotate the irreducible loop header weights. |
1067 | void annotateIrrLoopHeaderWeights(); |
1068 | |
1069 | // The hotness of the function from the profile count. |
1070 | enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; |
1071 | |
1072 | // Return the function hotness from the profile. |
1073 | FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } |
1074 | |
1075 | // Return the function hash. |
1076 | uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } |
1077 | |
1078 | // Return the profile record for this function; |
1079 | InstrProfRecord &getProfileRecord() { return ProfileRecord; } |
1080 | |
1081 | // Return the auxiliary BB information. |
1082 | PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { |
1083 | return FuncInfo.getBBInfo(BB); |
1084 | } |
1085 | |
1086 | // Return the auxiliary BB information if available. |
1087 | PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { |
1088 | return FuncInfo.findBBInfo(BB); |
1089 | } |
1090 | |
1091 | Function &getFunc() const { return F; } |
1092 | |
1093 | void dumpInfo(StringRef Str = "" ) const { FuncInfo.dumpInfo(Str); } |
1094 | |
1095 | uint64_t getProgramMaxCount() const { return ProgramMaxCount; } |
1096 | |
1097 | private: |
1098 | Function &F; |
1099 | Module *M; |
1100 | BlockFrequencyInfo *BFI; |
1101 | ProfileSummaryInfo *PSI; |
1102 | |
1103 | // This member stores the shared information with class PGOGenFunc. |
1104 | FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; |
1105 | |
1106 | // The maximum count value in the profile. This is only used in PGO use |
1107 | // compilation. |
1108 | uint64_t ProgramMaxCount; |
1109 | |
1110 | // Position of counter that remains to be read. |
1111 | uint32_t CountPosition = 0; |
1112 | |
1113 | // Total size of the profile count for this function. |
1114 | uint32_t ProfileCountSize = 0; |
1115 | |
1116 | // ProfileRecord for this function. |
1117 | InstrProfRecord ProfileRecord; |
1118 | |
1119 | // Function hotness info derived from profile. |
1120 | FuncFreqAttr FreqAttr; |
1121 | |
1122 | // Is to use the context sensitive profile. |
1123 | bool IsCS; |
1124 | |
1125 | // Find the Instrumented BB and set the value. Return false on error. |
1126 | bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); |
1127 | |
1128 | // Set the edge counter value for the unknown edge -- there should be only |
1129 | // one unknown edge. |
1130 | void setEdgeCount(DirectEdges &Edges, uint64_t Value); |
1131 | |
1132 | // Set the hot/cold inline hints based on the count values. |
1133 | // FIXME: This function should be removed once the functionality in |
1134 | // the inliner is implemented. |
1135 | void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { |
1136 | if (PSI->isHotCount(C: EntryCount)) |
1137 | FreqAttr = FFA_Hot; |
1138 | else if (PSI->isColdCount(C: MaxCount)) |
1139 | FreqAttr = FFA_Cold; |
1140 | } |
1141 | }; |
1142 | |
1143 | } // end anonymous namespace |
1144 | |
1145 | /// Set up InEdges/OutEdges for all BBs in the MST. |
1146 | static void setupBBInfoEdges( |
1147 | const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { |
1148 | // This is not required when there is block coverage inference. |
1149 | if (FuncInfo.BCI) |
1150 | return; |
1151 | for (const auto &E : FuncInfo.MST.allEdges()) { |
1152 | if (E->Removed) |
1153 | continue; |
1154 | const BasicBlock *SrcBB = E->SrcBB; |
1155 | const BasicBlock *DestBB = E->DestBB; |
1156 | PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(BB: SrcBB); |
1157 | PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(BB: DestBB); |
1158 | SrcInfo.addOutEdge(E: E.get()); |
1159 | DestInfo.addInEdge(E: E.get()); |
1160 | } |
1161 | } |
1162 | |
1163 | // Visit all the edges and assign the count value for the instrumented |
1164 | // edges and the BB. Return false on error. |
1165 | bool PGOUseFunc::setInstrumentedCounts( |
1166 | const std::vector<uint64_t> &CountFromProfile) { |
1167 | |
1168 | std::vector<BasicBlock *> InstrumentBBs; |
1169 | FuncInfo.getInstrumentBBs(InstrumentBBs); |
1170 | |
1171 | setupBBInfoEdges(FuncInfo); |
1172 | |
1173 | unsigned NumCounters = |
1174 | InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); |
1175 | // The number of counters here should match the number of counters |
1176 | // in profile. Return if they mismatch. |
1177 | if (NumCounters != CountFromProfile.size()) { |
1178 | return false; |
1179 | } |
1180 | auto *FuncEntry = &*F.begin(); |
1181 | |
1182 | // Set the profile count to the Instrumented BBs. |
1183 | uint32_t I = 0; |
1184 | for (BasicBlock *InstrBB : InstrumentBBs) { |
1185 | uint64_t CountValue = CountFromProfile[I++]; |
1186 | PGOUseBBInfo &Info = getBBInfo(BB: InstrBB); |
1187 | // If we reach here, we know that we have some nonzero count |
1188 | // values in this function. The entry count should not be 0. |
1189 | // Fix it if necessary. |
1190 | if (InstrBB == FuncEntry && CountValue == 0) |
1191 | CountValue = 1; |
1192 | Info.setBBInfoCount(CountValue); |
1193 | } |
1194 | ProfileCountSize = CountFromProfile.size(); |
1195 | CountPosition = I; |
1196 | |
1197 | // Set the edge count and update the count of unknown edges for BBs. |
1198 | auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { |
1199 | E->setEdgeCount(Value); |
1200 | this->getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--; |
1201 | this->getBBInfo(BB: E->DestBB).UnknownCountInEdge--; |
1202 | }; |
1203 | |
1204 | // Set the profile count the Instrumented edges. There are BBs that not in |
1205 | // MST but not instrumented. Need to set the edge count value so that we can |
1206 | // populate the profile counts later. |
1207 | for (const auto &E : FuncInfo.MST.allEdges()) { |
1208 | if (E->Removed || E->InMST) |
1209 | continue; |
1210 | const BasicBlock *SrcBB = E->SrcBB; |
1211 | PGOUseBBInfo &SrcInfo = getBBInfo(BB: SrcBB); |
1212 | |
1213 | // If only one out-edge, the edge profile count should be the same as BB |
1214 | // profile count. |
1215 | if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1) |
1216 | setEdgeCount(E.get(), *SrcInfo.Count); |
1217 | else { |
1218 | const BasicBlock *DestBB = E->DestBB; |
1219 | PGOUseBBInfo &DestInfo = getBBInfo(BB: DestBB); |
1220 | // If only one in-edge, the edge profile count should be the same as BB |
1221 | // profile count. |
1222 | if (DestInfo.Count && DestInfo.InEdges.size() == 1) |
1223 | setEdgeCount(E.get(), *DestInfo.Count); |
1224 | } |
1225 | if (E->Count) |
1226 | continue; |
1227 | // E's count should have been set from profile. If not, this meenas E skips |
1228 | // the instrumentation. We set the count to 0. |
1229 | setEdgeCount(E.get(), 0); |
1230 | } |
1231 | return true; |
1232 | } |
1233 | |
1234 | // Set the count value for the unknown edge. There should be one and only one |
1235 | // unknown edge in Edges vector. |
1236 | void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { |
1237 | for (auto &E : Edges) { |
1238 | if (E->Count) |
1239 | continue; |
1240 | E->setEdgeCount(Value); |
1241 | |
1242 | getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--; |
1243 | getBBInfo(BB: E->DestBB).UnknownCountInEdge--; |
1244 | return; |
1245 | } |
1246 | llvm_unreachable("Cannot find the unknown count edge" ); |
1247 | } |
1248 | |
1249 | // Emit function metadata indicating PGO profile mismatch. |
1250 | static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { |
1251 | const char MetadataName[] = "instr_prof_hash_mismatch" ; |
1252 | SmallVector<Metadata *, 2> Names; |
1253 | // If this metadata already exists, ignore. |
1254 | auto *Existing = F.getMetadata(KindID: LLVMContext::MD_annotation); |
1255 | if (Existing) { |
1256 | MDTuple *Tuple = cast<MDTuple>(Val: Existing); |
1257 | for (const auto &N : Tuple->operands()) { |
1258 | if (N.equalsStr(Str: MetadataName)) |
1259 | return; |
1260 | Names.push_back(Elt: N.get()); |
1261 | } |
1262 | } |
1263 | |
1264 | MDBuilder MDB(ctx); |
1265 | Names.push_back(Elt: MDB.createString(Str: MetadataName)); |
1266 | MDNode *MD = MDTuple::get(Context&: ctx, MDs: Names); |
1267 | F.setMetadata(KindID: LLVMContext::MD_annotation, Node: MD); |
1268 | } |
1269 | |
1270 | void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { |
1271 | handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) { |
1272 | auto &Ctx = M->getContext(); |
1273 | auto Err = IPE.get(); |
1274 | bool SkipWarning = false; |
1275 | LLVM_DEBUG(dbgs() << "Error in reading profile for Func " |
1276 | << FuncInfo.FuncName << ": " ); |
1277 | if (Err == instrprof_error::unknown_function) { |
1278 | IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; |
1279 | SkipWarning = !PGOWarnMissing; |
1280 | LLVM_DEBUG(dbgs() << "unknown function" ); |
1281 | } else if (Err == instrprof_error::hash_mismatch || |
1282 | Err == instrprof_error::malformed) { |
1283 | IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; |
1284 | SkipWarning = |
1285 | NoPGOWarnMismatch || |
1286 | (NoPGOWarnMismatchComdatWeak && |
1287 | (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || |
1288 | F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
1289 | LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash |
1290 | << " skip=" << SkipWarning << ")" ); |
1291 | // Emit function metadata indicating PGO profile mismatch. |
1292 | annotateFunctionWithHashMismatch(F, ctx&: M->getContext()); |
1293 | } |
1294 | |
1295 | LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n" ); |
1296 | if (SkipWarning) |
1297 | return; |
1298 | |
1299 | std::string Msg = |
1300 | IPE.message() + std::string(" " ) + F.getName().str() + |
1301 | std::string(" Hash = " ) + std::to_string(val: FuncInfo.FunctionHash) + |
1302 | std::string(" up to " ) + std::to_string(val: MismatchedFuncSum) + |
1303 | std::string(" count discarded" ); |
1304 | |
1305 | Ctx.diagnose( |
1306 | DI: DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); |
1307 | }); |
1308 | } |
1309 | |
1310 | // Read the profile from ProfileFileName and assign the value to the |
1311 | // instrumented BB and the edges. This function also updates ProgramMaxCount. |
1312 | // Return true if the profile are successfully read, and false on errors. |
1313 | bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, |
1314 | InstrProfRecord::CountPseudoKind &PseudoKind) { |
1315 | auto &Ctx = M->getContext(); |
1316 | uint64_t MismatchedFuncSum = 0; |
1317 | Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( |
1318 | FuncName: FuncInfo.FuncName, FuncHash: FuncInfo.FunctionHash, DeprecatedFuncName: FuncInfo.DeprecatedFuncName, |
1319 | MismatchedFuncSum: &MismatchedFuncSum); |
1320 | if (Error E = Result.takeError()) { |
1321 | handleInstrProfError(Err: std::move(E), MismatchedFuncSum); |
1322 | return false; |
1323 | } |
1324 | ProfileRecord = std::move(Result.get()); |
1325 | PseudoKind = ProfileRecord.getCountPseudoKind(); |
1326 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
1327 | return true; |
1328 | } |
1329 | std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; |
1330 | |
1331 | IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; |
1332 | LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n" ); |
1333 | |
1334 | uint64_t ValueSum = 0; |
1335 | for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { |
1336 | LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n" ); |
1337 | ValueSum += CountFromProfile[I]; |
1338 | } |
1339 | AllZeros = (ValueSum == 0); |
1340 | |
1341 | LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n" ); |
1342 | |
1343 | getBBInfo(BB: nullptr).UnknownCountOutEdge = 2; |
1344 | getBBInfo(BB: nullptr).UnknownCountInEdge = 2; |
1345 | |
1346 | if (!setInstrumentedCounts(CountFromProfile)) { |
1347 | LLVM_DEBUG( |
1348 | dbgs() << "Inconsistent number of counts, skipping this function" ); |
1349 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1350 | M->getName().data(), |
1351 | Twine("Inconsistent number of counts in " ) + F.getName().str() + |
1352 | Twine(": the profile may be stale or there is a function name " |
1353 | "collision." ), |
1354 | DS_Warning)); |
1355 | return false; |
1356 | } |
1357 | ProgramMaxCount = PGOReader->getMaximumFunctionCount(UseCS: IsCS); |
1358 | return true; |
1359 | } |
1360 | |
1361 | void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { |
1362 | uint64_t MismatchedFuncSum = 0; |
1363 | Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( |
1364 | FuncName: FuncInfo.FuncName, FuncHash: FuncInfo.FunctionHash, DeprecatedFuncName: FuncInfo.DeprecatedFuncName, |
1365 | MismatchedFuncSum: &MismatchedFuncSum); |
1366 | if (auto Err = Result.takeError()) { |
1367 | handleInstrProfError(Err: std::move(Err), MismatchedFuncSum); |
1368 | return; |
1369 | } |
1370 | IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; |
1371 | |
1372 | std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; |
1373 | DenseMap<const BasicBlock *, bool> Coverage; |
1374 | unsigned Index = 0; |
1375 | for (auto &BB : F) |
1376 | if (FuncInfo.BCI->shouldInstrumentBlock(BB)) |
1377 | Coverage[&BB] = (CountsFromProfile[Index++] != 0); |
1378 | assert(Index == CountsFromProfile.size()); |
1379 | |
1380 | // For each B in InverseDependencies[A], if A is covered then B is covered. |
1381 | DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> |
1382 | InverseDependencies; |
1383 | for (auto &BB : F) { |
1384 | for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { |
1385 | // If Dep is covered then BB is covered. |
1386 | InverseDependencies[Dep].insert(V: &BB); |
1387 | } |
1388 | } |
1389 | |
1390 | // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. |
1391 | std::stack<const BasicBlock *> CoveredBlocksToProcess; |
1392 | for (auto &[BB, IsCovered] : Coverage) |
1393 | if (IsCovered) |
1394 | CoveredBlocksToProcess.push(x: BB); |
1395 | |
1396 | while (!CoveredBlocksToProcess.empty()) { |
1397 | auto *CoveredBlock = CoveredBlocksToProcess.top(); |
1398 | assert(Coverage[CoveredBlock]); |
1399 | CoveredBlocksToProcess.pop(); |
1400 | for (auto *BB : InverseDependencies[CoveredBlock]) { |
1401 | // If CoveredBlock is covered then BB is covered. |
1402 | if (Coverage[BB]) |
1403 | continue; |
1404 | Coverage[BB] = true; |
1405 | CoveredBlocksToProcess.push(x: BB); |
1406 | } |
1407 | } |
1408 | |
1409 | // Annotate block coverage. |
1410 | MDBuilder MDB(F.getContext()); |
1411 | // We set the entry count to 10000 if the entry block is covered so that BFI |
1412 | // can propagate a fraction of this count to the other covered blocks. |
1413 | F.setEntryCount(Count: Coverage[&F.getEntryBlock()] ? 10000 : 0); |
1414 | for (auto &BB : F) { |
1415 | // For a block A and its successor B, we set the edge weight as follows: |
1416 | // If A is covered and B is covered, set weight=1. |
1417 | // If A is covered and B is uncovered, set weight=0. |
1418 | // If A is uncovered, set weight=1. |
1419 | // This setup will allow BFI to give nonzero profile counts to only covered |
1420 | // blocks. |
1421 | SmallVector<uint32_t, 4> Weights; |
1422 | for (auto *Succ : successors(BB: &BB)) |
1423 | Weights.push_back(Elt: (Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); |
1424 | if (Weights.size() >= 2) |
1425 | llvm::setBranchWeights(I&: *BB.getTerminator(), Weights); |
1426 | } |
1427 | |
1428 | unsigned NumCorruptCoverage = 0; |
1429 | DominatorTree DT(F); |
1430 | LoopInfo LI(DT); |
1431 | BranchProbabilityInfo BPI(F, LI); |
1432 | BlockFrequencyInfo BFI(F, BPI, LI); |
1433 | auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { |
1434 | if (auto C = BFI.getBlockProfileCount(BB: &BB)) |
1435 | return C == 0; |
1436 | return {}; |
1437 | }; |
1438 | LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n" ); |
1439 | for (auto &BB : F) { |
1440 | LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " " ) |
1441 | << (Coverage[&BB] ? "X " : " " ) << " " << BB.getName() |
1442 | << "\n" ); |
1443 | // In some cases it is possible to find a covered block that has no covered |
1444 | // successors, e.g., when a block calls a function that may call exit(). In |
1445 | // those cases, BFI could find its successor to be covered while BCI could |
1446 | // find its successor to be dead. |
1447 | if (Coverage[&BB] == IsBlockDead(BB).value_or(u: false)) { |
1448 | LLVM_DEBUG( |
1449 | dbgs() << "Found inconsistent block covearge for " << BB.getName() |
1450 | << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead" ) << " BFI=" |
1451 | << (IsBlockDead(BB).value() ? "Dead" : "Covered" ) << "\n" ); |
1452 | ++NumCorruptCoverage; |
1453 | } |
1454 | if (Coverage[&BB]) |
1455 | ++NumCoveredBlocks; |
1456 | } |
1457 | if (PGOVerifyBFI && NumCorruptCoverage) { |
1458 | auto &Ctx = M->getContext(); |
1459 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1460 | M->getName().data(), |
1461 | Twine("Found inconsistent block coverage for function " ) + F.getName() + |
1462 | " in " + Twine(NumCorruptCoverage) + " blocks." , |
1463 | DS_Warning)); |
1464 | } |
1465 | if (PGOViewBlockCoverageGraph) |
1466 | FuncInfo.BCI->viewBlockCoverageGraph(Coverage: &Coverage); |
1467 | } |
1468 | |
1469 | // Populate the counters from instrumented BBs to all BBs. |
1470 | // In the end of this operation, all BBs should have a valid count value. |
1471 | void PGOUseFunc::populateCounters() { |
1472 | bool Changes = true; |
1473 | unsigned NumPasses = 0; |
1474 | while (Changes) { |
1475 | NumPasses++; |
1476 | Changes = false; |
1477 | |
1478 | // For efficient traversal, it's better to start from the end as most |
1479 | // of the instrumented edges are at the end. |
1480 | for (auto &BB : reverse(C&: F)) { |
1481 | PGOUseBBInfo *UseBBInfo = findBBInfo(BB: &BB); |
1482 | if (UseBBInfo == nullptr) |
1483 | continue; |
1484 | if (!UseBBInfo->Count) { |
1485 | if (UseBBInfo->UnknownCountOutEdge == 0) { |
1486 | UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->OutEdges); |
1487 | Changes = true; |
1488 | } else if (UseBBInfo->UnknownCountInEdge == 0) { |
1489 | UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->InEdges); |
1490 | Changes = true; |
1491 | } |
1492 | } |
1493 | if (UseBBInfo->Count) { |
1494 | if (UseBBInfo->UnknownCountOutEdge == 1) { |
1495 | uint64_t Total = 0; |
1496 | uint64_t OutSum = sumEdgeCount(Edges: UseBBInfo->OutEdges); |
1497 | // If the one of the successor block can early terminate (no-return), |
1498 | // we can end up with situation where out edge sum count is larger as |
1499 | // the source BB's count is collected by a post-dominated block. |
1500 | if (*UseBBInfo->Count > OutSum) |
1501 | Total = *UseBBInfo->Count - OutSum; |
1502 | setEdgeCount(Edges&: UseBBInfo->OutEdges, Value: Total); |
1503 | Changes = true; |
1504 | } |
1505 | if (UseBBInfo->UnknownCountInEdge == 1) { |
1506 | uint64_t Total = 0; |
1507 | uint64_t InSum = sumEdgeCount(Edges: UseBBInfo->InEdges); |
1508 | if (*UseBBInfo->Count > InSum) |
1509 | Total = *UseBBInfo->Count - InSum; |
1510 | setEdgeCount(Edges&: UseBBInfo->InEdges, Value: Total); |
1511 | Changes = true; |
1512 | } |
1513 | } |
1514 | } |
1515 | } |
1516 | |
1517 | LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n" ); |
1518 | (void)NumPasses; |
1519 | #ifndef NDEBUG |
1520 | // Assert every BB has a valid counter. |
1521 | for (auto &BB : F) { |
1522 | auto BI = findBBInfo(BB: &BB); |
1523 | if (BI == nullptr) |
1524 | continue; |
1525 | assert(BI->Count && "BB count is not valid" ); |
1526 | } |
1527 | #endif |
1528 | uint64_t FuncEntryCount = *getBBInfo(BB: &*F.begin()).Count; |
1529 | uint64_t FuncMaxCount = FuncEntryCount; |
1530 | for (auto &BB : F) { |
1531 | auto BI = findBBInfo(BB: &BB); |
1532 | if (BI == nullptr) |
1533 | continue; |
1534 | FuncMaxCount = std::max(a: FuncMaxCount, b: *BI->Count); |
1535 | } |
1536 | |
1537 | // Fix the obviously inconsistent entry count. |
1538 | if (FuncMaxCount > 0 && FuncEntryCount == 0) |
1539 | FuncEntryCount = 1; |
1540 | F.setEntryCount(Count: ProfileCount(FuncEntryCount, Function::PCT_Real)); |
1541 | markFunctionAttributes(EntryCount: FuncEntryCount, MaxCount: FuncMaxCount); |
1542 | |
1543 | // Now annotate select instructions |
1544 | FuncInfo.SIVisitor.annotateSelects(UF: this, Ind: &CountPosition); |
1545 | assert(CountPosition == ProfileCountSize); |
1546 | |
1547 | LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile." )); |
1548 | } |
1549 | |
1550 | // Assign the scaled count values to the BB with multiple out edges. |
1551 | void PGOUseFunc::setBranchWeights() { |
1552 | // Generate MD_prof metadata for every branch instruction. |
1553 | LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() |
1554 | << " IsCS=" << IsCS << "\n" ); |
1555 | for (auto &BB : F) { |
1556 | Instruction *TI = BB.getTerminator(); |
1557 | if (TI->getNumSuccessors() < 2) |
1558 | continue; |
1559 | if (!(isa<BranchInst>(Val: TI) || isa<SwitchInst>(Val: TI) || |
1560 | isa<IndirectBrInst>(Val: TI) || isa<InvokeInst>(Val: TI) || |
1561 | isa<CallBrInst>(Val: TI))) |
1562 | continue; |
1563 | |
1564 | const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB); |
1565 | if (!*BBCountInfo.Count) |
1566 | continue; |
1567 | |
1568 | // We have a non-zero Branch BB. |
1569 | unsigned Size = BBCountInfo.OutEdges.size(); |
1570 | SmallVector<uint64_t, 2> EdgeCounts(Size, 0); |
1571 | uint64_t MaxCount = 0; |
1572 | for (unsigned s = 0; s < Size; s++) { |
1573 | const PGOUseEdge *E = BBCountInfo.OutEdges[s]; |
1574 | const BasicBlock *SrcBB = E->SrcBB; |
1575 | const BasicBlock *DestBB = E->DestBB; |
1576 | if (DestBB == nullptr) |
1577 | continue; |
1578 | unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB); |
1579 | uint64_t EdgeCount = *E->Count; |
1580 | if (EdgeCount > MaxCount) |
1581 | MaxCount = EdgeCount; |
1582 | EdgeCounts[SuccNum] = EdgeCount; |
1583 | } |
1584 | |
1585 | if (MaxCount) |
1586 | setProfMetadata(M, TI, EdgeCounts, MaxCount); |
1587 | else { |
1588 | // A zero MaxCount can come about when we have a BB with a positive |
1589 | // count, and whose successor blocks all have 0 count. This can happen |
1590 | // when there is no exit block and the code exits via a noreturn function. |
1591 | auto &Ctx = M->getContext(); |
1592 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1593 | M->getName().data(), |
1594 | Twine("Profile in " ) + F.getName().str() + |
1595 | Twine(" partially ignored" ) + |
1596 | Twine(", possibly due to the lack of a return path." ), |
1597 | DS_Warning)); |
1598 | } |
1599 | } |
1600 | } |
1601 | |
1602 | static bool isIndirectBrTarget(BasicBlock *BB) { |
1603 | for (BasicBlock *Pred : predecessors(BB)) { |
1604 | if (isa<IndirectBrInst>(Val: Pred->getTerminator())) |
1605 | return true; |
1606 | } |
1607 | return false; |
1608 | } |
1609 | |
1610 | void PGOUseFunc::() { |
1611 | LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n" ); |
1612 | // Find irr loop headers |
1613 | for (auto &BB : F) { |
1614 | // As a heuristic also annotate indrectbr targets as they have a high chance |
1615 | // to become an irreducible loop header after the indirectbr tail |
1616 | // duplication. |
1617 | if (BFI->isIrrLoopHeader(BB: &BB) || isIndirectBrTarget(BB: &BB)) { |
1618 | Instruction *TI = BB.getTerminator(); |
1619 | const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB); |
1620 | setIrrLoopHeaderMetadata(M, TI, Count: *BBCountInfo.Count); |
1621 | } |
1622 | } |
1623 | } |
1624 | |
1625 | void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { |
1626 | Module *M = F.getParent(); |
1627 | IRBuilder<> Builder(&SI); |
1628 | Type *Int64Ty = Builder.getInt64Ty(); |
1629 | auto *Step = Builder.CreateZExt(V: SI.getCondition(), DestTy: Int64Ty); |
1630 | Builder.CreateCall( |
1631 | Intrinsic::getDeclaration(M, Intrinsic::id: instrprof_increment_step), |
1632 | {FuncNameVar, Builder.getInt64(C: FuncHash), Builder.getInt32(C: TotalNumCtrs), |
1633 | Builder.getInt32(C: *CurCtrIdx), Step}); |
1634 | ++(*CurCtrIdx); |
1635 | } |
1636 | |
1637 | void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { |
1638 | std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; |
1639 | assert(*CurCtrIdx < CountFromProfile.size() && |
1640 | "Out of bound access of counters" ); |
1641 | uint64_t SCounts[2]; |
1642 | SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count |
1643 | ++(*CurCtrIdx); |
1644 | uint64_t TotalCount = 0; |
1645 | auto BI = UseFunc->findBBInfo(BB: SI.getParent()); |
1646 | if (BI != nullptr) |
1647 | TotalCount = *BI->Count; |
1648 | // False Count |
1649 | SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); |
1650 | uint64_t MaxCount = std::max(a: SCounts[0], b: SCounts[1]); |
1651 | if (MaxCount) |
1652 | setProfMetadata(M: F.getParent(), TI: &SI, EdgeCounts: SCounts, MaxCount); |
1653 | } |
1654 | |
1655 | void SelectInstVisitor::visitSelectInst(SelectInst &SI) { |
1656 | if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) |
1657 | return; |
1658 | // FIXME: do not handle this yet. |
1659 | if (SI.getCondition()->getType()->isVectorTy()) |
1660 | return; |
1661 | |
1662 | switch (Mode) { |
1663 | case VM_counting: |
1664 | NSIs++; |
1665 | return; |
1666 | case VM_instrument: |
1667 | instrumentOneSelectInst(SI); |
1668 | return; |
1669 | case VM_annotate: |
1670 | annotateOneSelectInst(SI); |
1671 | return; |
1672 | } |
1673 | |
1674 | llvm_unreachable("Unknown visiting mode" ); |
1675 | } |
1676 | |
1677 | // Traverse all valuesites and annotate the instructions for all value kind. |
1678 | void PGOUseFunc::annotateValueSites() { |
1679 | if (DisableValueProfiling) |
1680 | return; |
1681 | |
1682 | // Create the PGOFuncName meta data. |
1683 | createPGOFuncNameMetadata(F, PGOFuncName: FuncInfo.FuncName); |
1684 | |
1685 | for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) |
1686 | annotateValueSites(Kind); |
1687 | } |
1688 | |
1689 | // Annotate the instructions for a specific value kind. |
1690 | void PGOUseFunc::annotateValueSites(uint32_t Kind) { |
1691 | assert(Kind <= IPVK_Last); |
1692 | unsigned ValueSiteIndex = 0; |
1693 | auto &ValueSites = FuncInfo.ValueSites[Kind]; |
1694 | unsigned NumValueSites = ProfileRecord.getNumValueSites(ValueKind: Kind); |
1695 | if (NumValueSites != ValueSites.size()) { |
1696 | auto &Ctx = M->getContext(); |
1697 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1698 | M->getName().data(), |
1699 | Twine("Inconsistent number of value sites for " ) + |
1700 | Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"" ) + |
1701 | F.getName().str() + |
1702 | Twine("\", possibly due to the use of a stale profile." ), |
1703 | DS_Warning)); |
1704 | return; |
1705 | } |
1706 | |
1707 | for (VPCandidateInfo &I : ValueSites) { |
1708 | LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind |
1709 | << "): Index = " << ValueSiteIndex << " out of " |
1710 | << NumValueSites << "\n" ); |
1711 | annotateValueSite(M&: *M, Inst&: *I.AnnotatedInst, InstrProfR: ProfileRecord, |
1712 | ValueKind: static_cast<InstrProfValueKind>(Kind), SiteIndx: ValueSiteIndex, |
1713 | MaxMDCount: Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations |
1714 | : MaxNumAnnotations); |
1715 | ValueSiteIndex++; |
1716 | } |
1717 | } |
1718 | |
1719 | // Collect the set of members for each Comdat in module M and store |
1720 | // in ComdatMembers. |
1721 | static void collectComdatMembers( |
1722 | Module &M, |
1723 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { |
1724 | if (!DoComdatRenaming) |
1725 | return; |
1726 | for (Function &F : M) |
1727 | if (Comdat *C = F.getComdat()) |
1728 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &F)); |
1729 | for (GlobalVariable &GV : M.globals()) |
1730 | if (Comdat *C = GV.getComdat()) |
1731 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &GV)); |
1732 | for (GlobalAlias &GA : M.aliases()) |
1733 | if (Comdat *C = GA.getComdat()) |
1734 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &GA)); |
1735 | } |
1736 | |
1737 | // Return true if we should not find instrumentation data for this function |
1738 | static bool skipPGOUse(const Function &F) { |
1739 | if (F.isDeclaration()) |
1740 | return true; |
1741 | // If there are too many critical edges, PGO might cause |
1742 | // compiler time problem. Skip PGO if the number of |
1743 | // critical edges execeed the threshold. |
1744 | unsigned NumCriticalEdges = 0; |
1745 | for (auto &BB : F) { |
1746 | const Instruction *TI = BB.getTerminator(); |
1747 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
1748 | if (isCriticalEdge(TI, SuccNum: I)) |
1749 | NumCriticalEdges++; |
1750 | } |
1751 | } |
1752 | if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { |
1753 | LLVM_DEBUG(dbgs() << "In func " << F.getName() |
1754 | << ", NumCriticalEdges=" << NumCriticalEdges |
1755 | << " exceed the threshold. Skip PGO.\n" ); |
1756 | return true; |
1757 | } |
1758 | return false; |
1759 | } |
1760 | |
1761 | // Return true if we should not instrument this function |
1762 | static bool skipPGOGen(const Function &F) { |
1763 | if (skipPGOUse(F)) |
1764 | return true; |
1765 | if (F.hasFnAttribute(llvm::Attribute::Naked)) |
1766 | return true; |
1767 | if (F.hasFnAttribute(llvm::Attribute::NoProfile)) |
1768 | return true; |
1769 | if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) |
1770 | return true; |
1771 | if (F.getInstructionCount() < PGOFunctionSizeThreshold) |
1772 | return true; |
1773 | return false; |
1774 | } |
1775 | |
1776 | static bool InstrumentAllFunctions( |
1777 | Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, |
1778 | function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, |
1779 | function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { |
1780 | // For the context-sensitve instrumentation, we should have a separated pass |
1781 | // (before LTO/ThinLTO linking) to create these variables. |
1782 | if (!IsCS) |
1783 | createIRLevelProfileFlagVar(M, /*IsCS=*/false); |
1784 | |
1785 | Triple TT(M.getTargetTriple()); |
1786 | LLVMContext &Ctx = M.getContext(); |
1787 | if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) |
1788 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1789 | M.getName().data(), |
1790 | Twine("VTable value profiling is presently not " |
1791 | "supported for non-ELF object formats" ), |
1792 | DS_Warning)); |
1793 | std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; |
1794 | collectComdatMembers(M, ComdatMembers); |
1795 | |
1796 | for (auto &F : M) { |
1797 | if (skipPGOGen(F)) |
1798 | continue; |
1799 | auto &TLI = LookupTLI(F); |
1800 | auto *BPI = LookupBPI(F); |
1801 | auto *BFI = LookupBFI(F); |
1802 | instrumentOneFunc(F, M: &M, TLI, BPI, BFI, ComdatMembers, IsCS); |
1803 | } |
1804 | return true; |
1805 | } |
1806 | |
1807 | PreservedAnalyses |
1808 | PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { |
1809 | createProfileFileNameVar(M, InstrProfileOutput: CSInstrName); |
1810 | // The variable in a comdat may be discarded by LTO. Ensure the declaration |
1811 | // will be retained. |
1812 | appendToCompilerUsed(M, Values: createIRLevelProfileFlagVar(M, /*IsCS=*/true)); |
1813 | PreservedAnalyses PA; |
1814 | PA.preserve<FunctionAnalysisManagerModuleProxy>(); |
1815 | PA.preserveSet<AllAnalysesOn<Function>>(); |
1816 | return PA; |
1817 | } |
1818 | |
1819 | PreservedAnalyses PGOInstrumentationGen::run(Module &M, |
1820 | ModuleAnalysisManager &MAM) { |
1821 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1822 | auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { |
1823 | return FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
1824 | }; |
1825 | auto LookupBPI = [&FAM](Function &F) { |
1826 | return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F); |
1827 | }; |
1828 | auto LookupBFI = [&FAM](Function &F) { |
1829 | return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F); |
1830 | }; |
1831 | |
1832 | if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) |
1833 | return PreservedAnalyses::all(); |
1834 | |
1835 | return PreservedAnalyses::none(); |
1836 | } |
1837 | |
1838 | // Using the ratio b/w sums of profile count values and BFI count values to |
1839 | // adjust the func entry count. |
1840 | static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, |
1841 | BranchProbabilityInfo &NBPI) { |
1842 | Function &F = Func.getFunc(); |
1843 | BlockFrequencyInfo NBFI(F, NBPI, LI); |
1844 | #ifndef NDEBUG |
1845 | auto BFIEntryCount = F.getEntryCount(); |
1846 | assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && |
1847 | "Invalid BFI Entrycount" ); |
1848 | #endif |
1849 | auto SumCount = APFloat::getZero(Sem: APFloat::IEEEdouble()); |
1850 | auto SumBFICount = APFloat::getZero(Sem: APFloat::IEEEdouble()); |
1851 | for (auto &BBI : F) { |
1852 | uint64_t CountValue = 0; |
1853 | uint64_t BFICountValue = 0; |
1854 | if (!Func.findBBInfo(BB: &BBI)) |
1855 | continue; |
1856 | auto BFICount = NBFI.getBlockProfileCount(BB: &BBI); |
1857 | CountValue = *Func.getBBInfo(BB: &BBI).Count; |
1858 | BFICountValue = *BFICount; |
1859 | SumCount.add(RHS: APFloat(CountValue * 1.0), RM: APFloat::rmNearestTiesToEven); |
1860 | SumBFICount.add(RHS: APFloat(BFICountValue * 1.0), RM: APFloat::rmNearestTiesToEven); |
1861 | } |
1862 | if (SumCount.isZero()) |
1863 | return; |
1864 | |
1865 | assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && |
1866 | "Incorrect sum of BFI counts" ); |
1867 | if (SumBFICount.compare(RHS: SumCount) == APFloat::cmpEqual) |
1868 | return; |
1869 | double Scale = (SumCount / SumBFICount).convertToDouble(); |
1870 | if (Scale < 1.001 && Scale > 0.999) |
1871 | return; |
1872 | |
1873 | uint64_t FuncEntryCount = *Func.getBBInfo(BB: &*F.begin()).Count; |
1874 | uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; |
1875 | if (NewEntryCount == 0) |
1876 | NewEntryCount = 1; |
1877 | if (NewEntryCount != FuncEntryCount) { |
1878 | F.setEntryCount(Count: ProfileCount(NewEntryCount, Function::PCT_Real)); |
1879 | LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() |
1880 | << ", entry_count " << FuncEntryCount << " --> " |
1881 | << NewEntryCount << "\n" ); |
1882 | } |
1883 | } |
1884 | |
1885 | // Compare the profile count values with BFI count values, and print out |
1886 | // the non-matching ones. |
1887 | static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, |
1888 | BranchProbabilityInfo &NBPI, |
1889 | uint64_t HotCountThreshold, |
1890 | uint64_t ColdCountThreshold) { |
1891 | Function &F = Func.getFunc(); |
1892 | BlockFrequencyInfo NBFI(F, NBPI, LI); |
1893 | // bool PrintFunc = false; |
1894 | bool HotBBOnly = PGOVerifyHotBFI; |
1895 | StringRef Msg; |
1896 | OptimizationRemarkEmitter ORE(&F); |
1897 | |
1898 | unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; |
1899 | for (auto &BBI : F) { |
1900 | uint64_t CountValue = 0; |
1901 | uint64_t BFICountValue = 0; |
1902 | |
1903 | CountValue = Func.getBBInfo(BB: &BBI).Count.value_or(u&: CountValue); |
1904 | |
1905 | BBNum++; |
1906 | if (CountValue) |
1907 | NonZeroBBNum++; |
1908 | auto BFICount = NBFI.getBlockProfileCount(BB: &BBI); |
1909 | if (BFICount) |
1910 | BFICountValue = *BFICount; |
1911 | |
1912 | if (HotBBOnly) { |
1913 | bool rawIsHot = CountValue >= HotCountThreshold; |
1914 | bool BFIIsHot = BFICountValue >= HotCountThreshold; |
1915 | bool rawIsCold = CountValue <= ColdCountThreshold; |
1916 | bool ShowCount = false; |
1917 | if (rawIsHot && !BFIIsHot) { |
1918 | Msg = "raw-Hot to BFI-nonHot" ; |
1919 | ShowCount = true; |
1920 | } else if (rawIsCold && BFIIsHot) { |
1921 | Msg = "raw-Cold to BFI-Hot" ; |
1922 | ShowCount = true; |
1923 | } |
1924 | if (!ShowCount) |
1925 | continue; |
1926 | } else { |
1927 | if ((CountValue < PGOVerifyBFICutoff) && |
1928 | (BFICountValue < PGOVerifyBFICutoff)) |
1929 | continue; |
1930 | uint64_t Diff = (BFICountValue >= CountValue) |
1931 | ? BFICountValue - CountValue |
1932 | : CountValue - BFICountValue; |
1933 | if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) |
1934 | continue; |
1935 | } |
1936 | BBMisMatchNum++; |
1937 | |
1938 | ORE.emit(RemarkBuilder: [&]() { |
1939 | OptimizationRemarkAnalysis (DEBUG_TYPE, "bfi-verify" , |
1940 | F.getSubprogram(), &BBI); |
1941 | Remark << "BB " << ore::NV("Block" , BBI.getName()) |
1942 | << " Count=" << ore::NV("Count" , CountValue) |
1943 | << " BFI_Count=" << ore::NV("Count" , BFICountValue); |
1944 | if (!Msg.empty()) |
1945 | Remark << " (" << Msg << ")" ; |
1946 | return Remark; |
1947 | }); |
1948 | } |
1949 | if (BBMisMatchNum) |
1950 | ORE.emit(RemarkBuilder: [&]() { |
1951 | return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify" , |
1952 | F.getSubprogram(), &F.getEntryBlock()) |
1953 | << "In Func " << ore::NV("Function" , F.getName()) |
1954 | << ": Num_of_BB=" << ore::NV("Count" , BBNum) |
1955 | << ", Num_of_non_zerovalue_BB=" << ore::NV("Count" , NonZeroBBNum) |
1956 | << ", Num_of_mis_matching_BB=" << ore::NV("Count" , BBMisMatchNum); |
1957 | }); |
1958 | } |
1959 | |
1960 | static bool annotateAllFunctions( |
1961 | Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, |
1962 | vfs::FileSystem &FS, |
1963 | function_ref<TargetLibraryInfo &(Function &)> LookupTLI, |
1964 | function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, |
1965 | function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, |
1966 | ProfileSummaryInfo *PSI, bool IsCS) { |
1967 | LLVM_DEBUG(dbgs() << "Read in profile counters: " ); |
1968 | auto &Ctx = M.getContext(); |
1969 | // Read the counter array from file. |
1970 | auto ReaderOrErr = IndexedInstrProfReader::create(Path: ProfileFileName, FS, |
1971 | RemappingPath: ProfileRemappingFileName); |
1972 | if (Error E = ReaderOrErr.takeError()) { |
1973 | handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) { |
1974 | Ctx.diagnose( |
1975 | DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); |
1976 | }); |
1977 | return false; |
1978 | } |
1979 | |
1980 | std::unique_ptr<IndexedInstrProfReader> PGOReader = |
1981 | std::move(ReaderOrErr.get()); |
1982 | if (!PGOReader) { |
1983 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), |
1984 | StringRef("Cannot get PGOReader" ))); |
1985 | return false; |
1986 | } |
1987 | if (!PGOReader->hasCSIRLevelProfile() && IsCS) |
1988 | return false; |
1989 | |
1990 | // TODO: might need to change the warning once the clang option is finalized. |
1991 | if (!PGOReader->isIRLevelProfile()) { |
1992 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1993 | ProfileFileName.data(), "Not an IR level instrumentation profile" )); |
1994 | return false; |
1995 | } |
1996 | if (PGOReader->functionEntryOnly()) { |
1997 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1998 | ProfileFileName.data(), |
1999 | "Function entry profiles are not yet supported for optimization" )); |
2000 | return false; |
2001 | } |
2002 | |
2003 | // Add the profile summary (read from the header of the indexed summary) here |
2004 | // so that we can use it below when reading counters (which checks if the |
2005 | // function should be marked with a cold or inlinehint attribute). |
2006 | M.setProfileSummary(M: PGOReader->getSummary(UseCS: IsCS).getMD(Context&: M.getContext()), |
2007 | Kind: IsCS ? ProfileSummary::PSK_CSInstr |
2008 | : ProfileSummary::PSK_Instr); |
2009 | PSI->refresh(); |
2010 | |
2011 | std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; |
2012 | collectComdatMembers(M, ComdatMembers); |
2013 | std::vector<Function *> HotFunctions; |
2014 | std::vector<Function *> ColdFunctions; |
2015 | |
2016 | // If the profile marked as always instrument the entry BB, do the |
2017 | // same. Note this can be overwritten by the internal option in CFGMST.h |
2018 | bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); |
2019 | if (PGOInstrumentEntry.getNumOccurrences() > 0) |
2020 | InstrumentFuncEntry = PGOInstrumentEntry; |
2021 | bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); |
2022 | for (auto &F : M) { |
2023 | if (skipPGOUse(F)) |
2024 | continue; |
2025 | auto &TLI = LookupTLI(F); |
2026 | auto *BPI = LookupBPI(F); |
2027 | auto *BFI = LookupBFI(F); |
2028 | if (!HasSingleByteCoverage) { |
2029 | // Split indirectbr critical edges here before computing the MST rather |
2030 | // than later in getInstrBB() to avoid invalidating it. |
2031 | SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, |
2032 | BFI); |
2033 | } |
2034 | PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, |
2035 | InstrumentFuncEntry, HasSingleByteCoverage); |
2036 | if (HasSingleByteCoverage) { |
2037 | Func.populateCoverage(PGOReader: PGOReader.get()); |
2038 | continue; |
2039 | } |
2040 | // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, |
2041 | // it means the profile for the function is unrepresentative and this |
2042 | // function is actually hot / warm. We will reset the function hot / cold |
2043 | // attribute and drop all the profile counters. |
2044 | InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; |
2045 | bool AllZeros = false; |
2046 | if (!Func.readCounters(PGOReader: PGOReader.get(), AllZeros, PseudoKind)) |
2047 | continue; |
2048 | if (AllZeros) { |
2049 | F.setEntryCount(Count: ProfileCount(0, Function::PCT_Real)); |
2050 | if (Func.getProgramMaxCount() != 0) |
2051 | ColdFunctions.push_back(x: &F); |
2052 | continue; |
2053 | } |
2054 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
2055 | // Clear function attribute cold. |
2056 | if (F.hasFnAttribute(Attribute::Cold)) |
2057 | F.removeFnAttr(Attribute::Cold); |
2058 | // Set function attribute as hot. |
2059 | if (PseudoKind == InstrProfRecord::PseudoHot) |
2060 | F.addFnAttr(Attribute::Hot); |
2061 | continue; |
2062 | } |
2063 | Func.populateCounters(); |
2064 | Func.setBranchWeights(); |
2065 | Func.annotateValueSites(); |
2066 | Func.annotateIrrLoopHeaderWeights(); |
2067 | PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); |
2068 | if (FreqAttr == PGOUseFunc::FFA_Cold) |
2069 | ColdFunctions.push_back(x: &F); |
2070 | else if (FreqAttr == PGOUseFunc::FFA_Hot) |
2071 | HotFunctions.push_back(x: &F); |
2072 | if (PGOViewCounts != PGOVCT_None && |
2073 | (ViewBlockFreqFuncName.empty() || |
2074 | F.getName().equals(RHS: ViewBlockFreqFuncName))) { |
2075 | LoopInfo LI{DominatorTree(F)}; |
2076 | std::unique_ptr<BranchProbabilityInfo> NewBPI = |
2077 | std::make_unique<BranchProbabilityInfo>(args&: F, args&: LI); |
2078 | std::unique_ptr<BlockFrequencyInfo> NewBFI = |
2079 | std::make_unique<BlockFrequencyInfo>(args&: F, args&: *NewBPI, args&: LI); |
2080 | if (PGOViewCounts == PGOVCT_Graph) |
2081 | NewBFI->view(); |
2082 | else if (PGOViewCounts == PGOVCT_Text) { |
2083 | dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n" ; |
2084 | NewBFI->print(OS&: dbgs()); |
2085 | } |
2086 | } |
2087 | if (PGOViewRawCounts != PGOVCT_None && |
2088 | (ViewBlockFreqFuncName.empty() || |
2089 | F.getName().equals(RHS: ViewBlockFreqFuncName))) { |
2090 | if (PGOViewRawCounts == PGOVCT_Graph) |
2091 | if (ViewBlockFreqFuncName.empty()) |
2092 | WriteGraph(G: &Func, Name: Twine("PGORawCounts_" ) + Func.getFunc().getName()); |
2093 | else |
2094 | ViewGraph(G: &Func, Name: Twine("PGORawCounts_" ) + Func.getFunc().getName()); |
2095 | else if (PGOViewRawCounts == PGOVCT_Text) { |
2096 | dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n" ; |
2097 | Func.dumpInfo(); |
2098 | } |
2099 | } |
2100 | |
2101 | if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { |
2102 | LoopInfo LI{DominatorTree(F)}; |
2103 | BranchProbabilityInfo NBPI(F, LI); |
2104 | |
2105 | // Fix func entry count. |
2106 | if (PGOFixEntryCount) |
2107 | fixFuncEntryCount(Func, LI, NBPI); |
2108 | |
2109 | // Verify BlockFrequency information. |
2110 | uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; |
2111 | if (PGOVerifyHotBFI) { |
2112 | HotCountThreshold = PSI->getOrCompHotCountThreshold(); |
2113 | ColdCountThreshold = PSI->getOrCompColdCountThreshold(); |
2114 | } |
2115 | verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); |
2116 | } |
2117 | } |
2118 | |
2119 | // Set function hotness attribute from the profile. |
2120 | // We have to apply these attributes at the end because their presence |
2121 | // can affect the BranchProbabilityInfo of any callers, resulting in an |
2122 | // inconsistent MST between prof-gen and prof-use. |
2123 | for (auto &F : HotFunctions) { |
2124 | F->addFnAttr(Attribute::InlineHint); |
2125 | LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() |
2126 | << "\n" ); |
2127 | } |
2128 | for (auto &F : ColdFunctions) { |
2129 | // Only set when there is no Attribute::Hot set by the user. For Hot |
2130 | // attribute, user's annotation has the precedence over the profile. |
2131 | if (F->hasFnAttribute(Attribute::Hot)) { |
2132 | auto &Ctx = M.getContext(); |
2133 | std::string Msg = std::string("Function " ) + F->getName().str() + |
2134 | std::string(" is annotated as a hot function but" |
2135 | " the profile is cold" ); |
2136 | Ctx.diagnose( |
2137 | DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
2138 | continue; |
2139 | } |
2140 | F->addFnAttr(Attribute::Cold); |
2141 | LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() |
2142 | << "\n" ); |
2143 | } |
2144 | return true; |
2145 | } |
2146 | |
2147 | PGOInstrumentationUse::PGOInstrumentationUse( |
2148 | std::string Filename, std::string RemappingFilename, bool IsCS, |
2149 | IntrusiveRefCntPtr<vfs::FileSystem> VFS) |
2150 | : ProfileFileName(std::move(Filename)), |
2151 | ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), |
2152 | FS(std::move(VFS)) { |
2153 | if (!PGOTestProfileFile.empty()) |
2154 | ProfileFileName = PGOTestProfileFile; |
2155 | if (!PGOTestProfileRemappingFile.empty()) |
2156 | ProfileRemappingFileName = PGOTestProfileRemappingFile; |
2157 | if (!FS) |
2158 | FS = vfs::getRealFileSystem(); |
2159 | } |
2160 | |
2161 | PreservedAnalyses PGOInstrumentationUse::run(Module &M, |
2162 | ModuleAnalysisManager &MAM) { |
2163 | |
2164 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
2165 | auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { |
2166 | return FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
2167 | }; |
2168 | auto LookupBPI = [&FAM](Function &F) { |
2169 | return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F); |
2170 | }; |
2171 | auto LookupBFI = [&FAM](Function &F) { |
2172 | return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F); |
2173 | }; |
2174 | |
2175 | auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
2176 | |
2177 | if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, FS&: *FS, |
2178 | LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) |
2179 | return PreservedAnalyses::all(); |
2180 | |
2181 | return PreservedAnalyses::none(); |
2182 | } |
2183 | |
2184 | static std::string getSimpleNodeName(const BasicBlock *Node) { |
2185 | if (!Node->getName().empty()) |
2186 | return Node->getName().str(); |
2187 | |
2188 | std::string SimpleNodeName; |
2189 | raw_string_ostream OS(SimpleNodeName); |
2190 | Node->printAsOperand(O&: OS, PrintType: false); |
2191 | return OS.str(); |
2192 | } |
2193 | |
2194 | void llvm::setProfMetadata(Module *M, Instruction *TI, |
2195 | ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { |
2196 | assert(MaxCount > 0 && "Bad max count" ); |
2197 | uint64_t Scale = calculateCountScale(MaxCount); |
2198 | SmallVector<unsigned, 4> Weights; |
2199 | for (const auto &ECI : EdgeCounts) |
2200 | Weights.push_back(Elt: scaleBranchCount(Count: ECI, Scale)); |
2201 | |
2202 | LLVM_DEBUG(dbgs() << "Weight is: " ; for (const auto &W |
2203 | : Weights) { |
2204 | dbgs() << W << " " ; |
2205 | } dbgs() << "\n" ;); |
2206 | |
2207 | misexpect::checkExpectAnnotations(I&: *TI, ExistingWeights: Weights, /*IsFrontend=*/false); |
2208 | |
2209 | setBranchWeights(I&: *TI, Weights); |
2210 | if (EmitBranchProbability) { |
2211 | std::string BrCondStr = getBranchCondString(TI); |
2212 | if (BrCondStr.empty()) |
2213 | return; |
2214 | |
2215 | uint64_t WSum = |
2216 | std::accumulate(first: Weights.begin(), last: Weights.end(), init: (uint64_t)0, |
2217 | binary_op: [](uint64_t w1, uint64_t w2) { return w1 + w2; }); |
2218 | uint64_t TotalCount = |
2219 | std::accumulate(first: EdgeCounts.begin(), last: EdgeCounts.end(), init: (uint64_t)0, |
2220 | binary_op: [](uint64_t c1, uint64_t c2) { return c1 + c2; }); |
2221 | Scale = calculateCountScale(MaxCount: WSum); |
2222 | BranchProbability BP(scaleBranchCount(Count: Weights[0], Scale), |
2223 | scaleBranchCount(Count: WSum, Scale)); |
2224 | std::string BranchProbStr; |
2225 | raw_string_ostream OS(BranchProbStr); |
2226 | OS << BP; |
2227 | OS << " (total count : " << TotalCount << ")" ; |
2228 | OS.flush(); |
2229 | Function *F = TI->getParent()->getParent(); |
2230 | OptimizationRemarkEmitter ORE(F); |
2231 | ORE.emit(RemarkBuilder: [&]() { |
2232 | return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation" , TI) |
2233 | << BrCondStr << " is true with probability : " << BranchProbStr; |
2234 | }); |
2235 | } |
2236 | } |
2237 | |
2238 | namespace llvm { |
2239 | |
2240 | void (Module *M, Instruction *TI, uint64_t Count) { |
2241 | MDBuilder MDB(M->getContext()); |
2242 | TI->setMetadata(KindID: llvm::LLVMContext::MD_irr_loop, |
2243 | Node: MDB.createIrrLoopHeaderWeight(Weight: Count)); |
2244 | } |
2245 | |
2246 | template <> struct GraphTraits<PGOUseFunc *> { |
2247 | using NodeRef = const BasicBlock *; |
2248 | using ChildIteratorType = const_succ_iterator; |
2249 | using nodes_iterator = pointer_iterator<Function::const_iterator>; |
2250 | |
2251 | static NodeRef getEntryNode(const PGOUseFunc *G) { |
2252 | return &G->getFunc().front(); |
2253 | } |
2254 | |
2255 | static ChildIteratorType child_begin(const NodeRef N) { |
2256 | return succ_begin(BB: N); |
2257 | } |
2258 | |
2259 | static ChildIteratorType child_end(const NodeRef N) { return succ_end(BB: N); } |
2260 | |
2261 | static nodes_iterator nodes_begin(const PGOUseFunc *G) { |
2262 | return nodes_iterator(G->getFunc().begin()); |
2263 | } |
2264 | |
2265 | static nodes_iterator nodes_end(const PGOUseFunc *G) { |
2266 | return nodes_iterator(G->getFunc().end()); |
2267 | } |
2268 | }; |
2269 | |
2270 | template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { |
2271 | explicit DOTGraphTraits(bool isSimple = false) |
2272 | : DefaultDOTGraphTraits(isSimple) {} |
2273 | |
2274 | static std::string getGraphName(const PGOUseFunc *G) { |
2275 | return std::string(G->getFunc().getName()); |
2276 | } |
2277 | |
2278 | std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { |
2279 | std::string Result; |
2280 | raw_string_ostream OS(Result); |
2281 | |
2282 | OS << getSimpleNodeName(Node) << ":\\l" ; |
2283 | PGOUseBBInfo *BI = Graph->findBBInfo(BB: Node); |
2284 | OS << "Count : " ; |
2285 | if (BI && BI->Count) |
2286 | OS << *BI->Count << "\\l" ; |
2287 | else |
2288 | OS << "Unknown\\l" ; |
2289 | |
2290 | if (!PGOInstrSelect) |
2291 | return Result; |
2292 | |
2293 | for (const Instruction &I : *Node) { |
2294 | if (!isa<SelectInst>(Val: &I)) |
2295 | continue; |
2296 | // Display scaled counts for SELECT instruction: |
2297 | OS << "SELECT : { T = " ; |
2298 | uint64_t TC, FC; |
2299 | bool HasProf = extractBranchWeights(I, TrueVal&: TC, FalseVal&: FC); |
2300 | if (!HasProf) |
2301 | OS << "Unknown, F = Unknown }\\l" ; |
2302 | else |
2303 | OS << TC << ", F = " << FC << " }\\l" ; |
2304 | } |
2305 | return Result; |
2306 | } |
2307 | }; |
2308 | |
2309 | } // end namespace llvm |
2310 | |