1 | //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "polly/CodeGen/PerfMonitor.h" |
12 | #include "polly/CodeGen/RuntimeDebugBuilder.h" |
13 | #include "polly/ScopInfo.h" |
14 | #include "llvm/ADT/Twine.h" |
15 | #include "llvm/IR/IntrinsicsX86.h" |
16 | #include "llvm/TargetParser/Triple.h" |
17 | |
18 | using namespace llvm; |
19 | using namespace polly; |
20 | |
21 | Function *PerfMonitor::getAtExit() { |
22 | const char *Name = "atexit" ; |
23 | Function *F = M->getFunction(Name); |
24 | |
25 | if (!F) { |
26 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
27 | FunctionType *Ty = |
28 | FunctionType::get(Result: Builder.getInt32Ty(), Params: {Builder.getPtrTy()}, isVarArg: false); |
29 | F = Function::Create(Ty, Linkage, N: Name, M); |
30 | } |
31 | |
32 | return F; |
33 | } |
34 | |
35 | void PerfMonitor::addToGlobalConstructors(Function *Fn) { |
36 | const char *Name = "llvm.global_ctors" ; |
37 | GlobalVariable *GV = M->getGlobalVariable(Name); |
38 | std::vector<Constant *> V; |
39 | |
40 | if (GV) { |
41 | Constant *Array = GV->getInitializer(); |
42 | for (Value *X : Array->operand_values()) |
43 | V.push_back(x: cast<Constant>(Val: X)); |
44 | GV->eraseFromParent(); |
45 | } |
46 | |
47 | StructType *ST = |
48 | StructType::get(elt1: Builder.getInt32Ty(), elts: Fn->getType(), elts: Builder.getPtrTy()); |
49 | |
50 | V.push_back( |
51 | x: ConstantStruct::get(T: ST, Vs: Builder.getInt32(C: 10), Vs: Fn, |
52 | Vs: ConstantPointerNull::get(T: Builder.getPtrTy()))); |
53 | ArrayType *Ty = ArrayType::get(ElementType: ST, NumElements: V.size()); |
54 | |
55 | GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, |
56 | ConstantArray::get(T: Ty, V), Name, nullptr, |
57 | GlobalVariable::NotThreadLocal); |
58 | } |
59 | |
60 | Function *PerfMonitor::getRDTSCP() { |
61 | return Intrinsic::getDeclaration(M, Intrinsic::id: x86_rdtscp); |
62 | } |
63 | |
64 | PerfMonitor::PerfMonitor(const Scop &S, Module *M) |
65 | : M(M), Builder(M->getContext()), S(S) { |
66 | if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64) |
67 | Supported = true; |
68 | else |
69 | Supported = false; |
70 | } |
71 | |
72 | static void TryRegisterGlobal(Module *M, const char *Name, |
73 | Constant *InitialValue, Value **Location) { |
74 | *Location = M->getGlobalVariable(Name); |
75 | |
76 | if (!*Location) |
77 | *Location = new GlobalVariable( |
78 | *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, |
79 | InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); |
80 | } |
81 | |
82 | // Generate a unique name that is usable as a LLVM name for a scop to name its |
83 | // performance counter. |
84 | static std::string GetScopUniqueVarname(const Scop &S) { |
85 | std::string EntryString, ExitString; |
86 | std::tie(args&: EntryString, args&: ExitString) = S.getEntryExitStr(); |
87 | |
88 | return (Twine("__polly_perf_in_" ) + S.getFunction().getName() + "_from__" + |
89 | EntryString + "__to__" + ExitString) |
90 | .str(); |
91 | } |
92 | |
93 | void PerfMonitor::addScopCounter() { |
94 | const std::string varname = GetScopUniqueVarname(S); |
95 | TryRegisterGlobal(M, Name: (varname + "_cycles" ).c_str(), InitialValue: Builder.getInt64(C: 0), |
96 | Location: &CyclesInCurrentScopPtr); |
97 | |
98 | TryRegisterGlobal(M, Name: (varname + "_trip_count" ).c_str(), InitialValue: Builder.getInt64(C: 0), |
99 | Location: &TripCountForCurrentScopPtr); |
100 | } |
101 | |
102 | void PerfMonitor::addGlobalVariables() { |
103 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_total_start" , InitialValue: Builder.getInt64(C: 0), |
104 | Location: &CyclesTotalStartPtr); |
105 | |
106 | TryRegisterGlobal(M, Name: "__polly_perf_initialized" , InitialValue: Builder.getInt1(V: false), |
107 | Location: &AlreadyInitializedPtr); |
108 | |
109 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scops" , InitialValue: Builder.getInt64(C: 0), |
110 | Location: &CyclesInScopsPtr); |
111 | |
112 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scop_start" , InitialValue: Builder.getInt64(C: 0), |
113 | Location: &CyclesInScopStartPtr); |
114 | } |
115 | |
116 | static const char *InitFunctionName = "__polly_perf_init" ; |
117 | static const char *FinalReportingFunctionName = "__polly_perf_final" ; |
118 | |
119 | static BasicBlock *FinalStartBB = nullptr; |
120 | static ReturnInst *ReturnFromFinal = nullptr; |
121 | |
122 | Function *PerfMonitor::insertFinalReporting() { |
123 | // Create new function. |
124 | GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; |
125 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false); |
126 | Function *ExitFn = |
127 | Function::Create(Ty, Linkage, N: FinalReportingFunctionName, M); |
128 | FinalStartBB = BasicBlock::Create(Context&: M->getContext(), Name: "start" , Parent: ExitFn); |
129 | Builder.SetInsertPoint(FinalStartBB); |
130 | |
131 | if (!Supported) { |
132 | RuntimeDebugBuilder::createCPUPrinter( |
133 | Builder, args: "Polly runtime information generation not supported\n" ); |
134 | Builder.CreateRetVoid(); |
135 | return ExitFn; |
136 | } |
137 | |
138 | // Measure current cycles and compute final timings. |
139 | Function *RDTSCPFn = getRDTSCP(); |
140 | |
141 | Type *Int64Ty = Builder.getInt64Ty(); |
142 | Value *CurrentCycles = |
143 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
144 | Value *CyclesStart = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesTotalStartPtr, isVolatile: true); |
145 | Value *CyclesTotal = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart); |
146 | Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true); |
147 | |
148 | // Print the runtime information. |
149 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Polly runtime information\n" ); |
150 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "-------------------------\n" ); |
151 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Total: " , args: CyclesTotal, args: "\n" ); |
152 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Scops: " , args: CyclesInScops, |
153 | args: "\n" ); |
154 | |
155 | // Print the preamble for per-scop information. |
156 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "\n" ); |
157 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Per SCoP information\n" ); |
158 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "--------------------\n" ); |
159 | |
160 | RuntimeDebugBuilder::createCPUPrinter( |
161 | Builder, args: "scop function, " |
162 | "entry block name, exit block name, total time, trip count\n" ); |
163 | ReturnFromFinal = Builder.CreateRetVoid(); |
164 | return ExitFn; |
165 | } |
166 | |
167 | void PerfMonitor::AppendScopReporting() { |
168 | if (!Supported) |
169 | return; |
170 | |
171 | assert(FinalStartBB && "Expected FinalStartBB to be initialized by " |
172 | "PerfMonitor::insertFinalReporting." ); |
173 | assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by " |
174 | "PerfMonitor::insertFinalReporting." ); |
175 | |
176 | Builder.SetInsertPoint(FinalStartBB); |
177 | ReturnFromFinal->eraseFromParent(); |
178 | |
179 | Type *Int64Ty = Builder.getInt64Ty(); |
180 | Value *CyclesInCurrentScop = |
181 | Builder.CreateLoad(Ty: Int64Ty, Ptr: this->CyclesInCurrentScopPtr, isVolatile: true); |
182 | |
183 | Value *TripCountForCurrentScop = |
184 | Builder.CreateLoad(Ty: Int64Ty, Ptr: this->TripCountForCurrentScopPtr, isVolatile: true); |
185 | |
186 | std::string EntryName, ExitName; |
187 | std::tie(args&: EntryName, args&: ExitName) = S.getEntryExitStr(); |
188 | |
189 | // print in CSV for easy parsing with other tools. |
190 | RuntimeDebugBuilder::createCPUPrinter( |
191 | Builder, args: S.getFunction().getName(), args: ", " , args: EntryName, args: ", " , args: ExitName, args: ", " , |
192 | args: CyclesInCurrentScop, args: ", " , args: TripCountForCurrentScop, args: "\n" ); |
193 | |
194 | ReturnFromFinal = Builder.CreateRetVoid(); |
195 | } |
196 | |
197 | static Function *FinalReporting = nullptr; |
198 | |
199 | void PerfMonitor::initialize() { |
200 | addGlobalVariables(); |
201 | addScopCounter(); |
202 | |
203 | // Ensure that we only add the final reporting function once. |
204 | // On later invocations, append to the reporting function. |
205 | if (!FinalReporting) { |
206 | FinalReporting = insertFinalReporting(); |
207 | |
208 | Function *InitFn = insertInitFunction(FinalReporting); |
209 | addToGlobalConstructors(Fn: InitFn); |
210 | } |
211 | |
212 | AppendScopReporting(); |
213 | } |
214 | |
215 | Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { |
216 | // Insert function definition and BBs. |
217 | GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; |
218 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false); |
219 | Function *InitFn = Function::Create(Ty, Linkage, N: InitFunctionName, M); |
220 | BasicBlock *Start = BasicBlock::Create(Context&: M->getContext(), Name: "start" , Parent: InitFn); |
221 | BasicBlock *EarlyReturn = |
222 | BasicBlock::Create(Context&: M->getContext(), Name: "earlyreturn" , Parent: InitFn); |
223 | BasicBlock *InitBB = BasicBlock::Create(Context&: M->getContext(), Name: "initbb" , Parent: InitFn); |
224 | |
225 | Builder.SetInsertPoint(Start); |
226 | |
227 | // Check if this function was already run. If yes, return. |
228 | // |
229 | // In case profiling has been enabled in multiple translation units, the |
230 | // initializer function will be added to the global constructors list of |
231 | // each translation unit. When merging translation units, the global |
232 | // constructor lists are just appended, such that the initializer will appear |
233 | // multiple times. To avoid initializations being run multiple times (and |
234 | // especially to avoid that atExitFn is called more than once), we bail |
235 | // out if the initializer is run more than once. |
236 | Value *HasRunBefore = |
237 | Builder.CreateLoad(Ty: Builder.getInt1Ty(), Ptr: AlreadyInitializedPtr); |
238 | Builder.CreateCondBr(Cond: HasRunBefore, True: EarlyReturn, False: InitBB); |
239 | Builder.SetInsertPoint(EarlyReturn); |
240 | Builder.CreateRetVoid(); |
241 | |
242 | // Keep track that this function has been run once. |
243 | Builder.SetInsertPoint(InitBB); |
244 | Value *True = Builder.getInt1(V: true); |
245 | Builder.CreateStore(Val: True, Ptr: AlreadyInitializedPtr); |
246 | |
247 | // Register the final reporting function with atexit(). |
248 | Value *FinalReportingPtr = |
249 | Builder.CreatePointerCast(V: FinalReporting, DestTy: Builder.getPtrTy()); |
250 | Function *AtExitFn = getAtExit(); |
251 | Builder.CreateCall(Callee: AtExitFn, Args: {FinalReportingPtr}); |
252 | |
253 | if (Supported) { |
254 | // Read the currently cycle counter and store the result for later. |
255 | Function *RDTSCPFn = getRDTSCP(); |
256 | Value *CurrentCycles = |
257 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
258 | Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesTotalStartPtr, isVolatile: true); |
259 | } |
260 | Builder.CreateRetVoid(); |
261 | |
262 | return InitFn; |
263 | } |
264 | |
265 | void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { |
266 | if (!Supported) |
267 | return; |
268 | |
269 | Builder.SetInsertPoint(InsertBefore); |
270 | Function *RDTSCPFn = getRDTSCP(); |
271 | Value *CurrentCycles = |
272 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
273 | Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesInScopStartPtr, isVolatile: true); |
274 | } |
275 | |
276 | void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { |
277 | if (!Supported) |
278 | return; |
279 | |
280 | Builder.SetInsertPoint(InsertBefore); |
281 | Function *RDTSCPFn = getRDTSCP(); |
282 | Type *Int64Ty = Builder.getInt64Ty(); |
283 | LoadInst *CyclesStart = |
284 | Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopStartPtr, isVolatile: true); |
285 | Value *CurrentCycles = |
286 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
287 | Value *CyclesInScop = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart); |
288 | Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true); |
289 | CyclesInScops = Builder.CreateAdd(LHS: CyclesInScops, RHS: CyclesInScop); |
290 | Builder.CreateStore(Val: CyclesInScops, Ptr: CyclesInScopsPtr, isVolatile: true); |
291 | |
292 | Value *CyclesInCurrentScop = |
293 | Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInCurrentScopPtr, isVolatile: true); |
294 | CyclesInCurrentScop = Builder.CreateAdd(LHS: CyclesInCurrentScop, RHS: CyclesInScop); |
295 | Builder.CreateStore(Val: CyclesInCurrentScop, Ptr: CyclesInCurrentScopPtr, isVolatile: true); |
296 | |
297 | Value *TripCountForCurrentScop = |
298 | Builder.CreateLoad(Ty: Int64Ty, Ptr: TripCountForCurrentScopPtr, isVolatile: true); |
299 | TripCountForCurrentScop = |
300 | Builder.CreateAdd(LHS: TripCountForCurrentScop, RHS: Builder.getInt64(C: 1)); |
301 | Builder.CreateStore(Val: TripCountForCurrentScop, Ptr: TripCountForCurrentScopPtr, |
302 | isVolatile: true); |
303 | } |
304 | |