1 | //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "polly/CodeGen/PerfMonitor.h" |
12 | #include "polly/CodeGen/RuntimeDebugBuilder.h" |
13 | #include "polly/ScopInfo.h" |
14 | #include "llvm/ADT/Twine.h" |
15 | #include "llvm/IR/IntrinsicsX86.h" |
16 | #include "llvm/IR/Module.h" |
17 | #include "llvm/TargetParser/Triple.h" |
18 | |
19 | using namespace llvm; |
20 | using namespace polly; |
21 | |
22 | Function *PerfMonitor::getAtExit() { |
23 | const char *Name = "atexit" ; |
24 | Function *F = M->getFunction(Name); |
25 | |
26 | if (!F) { |
27 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
28 | FunctionType *Ty = |
29 | FunctionType::get(Result: Builder.getInt32Ty(), Params: {Builder.getPtrTy()}, isVarArg: false); |
30 | F = Function::Create(Ty, Linkage, N: Name, M); |
31 | } |
32 | |
33 | return F; |
34 | } |
35 | |
36 | void PerfMonitor::addToGlobalConstructors(Function *Fn) { |
37 | const char *Name = "llvm.global_ctors" ; |
38 | GlobalVariable *GV = M->getGlobalVariable(Name); |
39 | std::vector<Constant *> V; |
40 | |
41 | if (GV) { |
42 | Constant *Array = GV->getInitializer(); |
43 | for (Value *X : Array->operand_values()) |
44 | V.push_back(x: cast<Constant>(Val: X)); |
45 | GV->eraseFromParent(); |
46 | } |
47 | |
48 | StructType *ST = |
49 | StructType::get(elt1: Builder.getInt32Ty(), elts: Fn->getType(), elts: Builder.getPtrTy()); |
50 | |
51 | V.push_back( |
52 | x: ConstantStruct::get(T: ST, Vs: Builder.getInt32(C: 10), Vs: Fn, |
53 | Vs: ConstantPointerNull::get(T: Builder.getPtrTy()))); |
54 | ArrayType *Ty = ArrayType::get(ElementType: ST, NumElements: V.size()); |
55 | |
56 | GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, |
57 | ConstantArray::get(T: Ty, V), Name, nullptr, |
58 | GlobalVariable::NotThreadLocal); |
59 | } |
60 | |
61 | Function *PerfMonitor::getRDTSCP() { |
62 | return Intrinsic::getOrInsertDeclaration(M, Intrinsic::id: x86_rdtscp); |
63 | } |
64 | |
65 | PerfMonitor::PerfMonitor(const Scop &S, Module *M) |
66 | : M(M), Builder(M->getContext()), S(S) { |
67 | if (M->getTargetTriple().getArch() == llvm::Triple::x86_64) |
68 | Supported = true; |
69 | else |
70 | Supported = false; |
71 | } |
72 | |
73 | static void TryRegisterGlobal(Module *M, const char *Name, |
74 | Constant *InitialValue, Value **Location) { |
75 | *Location = M->getGlobalVariable(Name); |
76 | |
77 | if (!*Location) |
78 | *Location = new GlobalVariable( |
79 | *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, |
80 | InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); |
81 | } |
82 | |
83 | // Generate a unique name that is usable as a LLVM name for a scop to name its |
84 | // performance counter. |
85 | static std::string GetScopUniqueVarname(const Scop &S) { |
86 | std::string EntryString, ExitString; |
87 | std::tie(args&: EntryString, args&: ExitString) = S.getEntryExitStr(); |
88 | |
89 | return (Twine("__polly_perf_in_" ) + S.getFunction().getName() + "_from__" + |
90 | EntryString + "__to__" + ExitString) |
91 | .str(); |
92 | } |
93 | |
94 | void PerfMonitor::addScopCounter() { |
95 | const std::string varname = GetScopUniqueVarname(S); |
96 | TryRegisterGlobal(M, Name: (varname + "_cycles" ).c_str(), InitialValue: Builder.getInt64(C: 0), |
97 | Location: &CyclesInCurrentScopPtr); |
98 | |
99 | TryRegisterGlobal(M, Name: (varname + "_trip_count" ).c_str(), InitialValue: Builder.getInt64(C: 0), |
100 | Location: &TripCountForCurrentScopPtr); |
101 | } |
102 | |
103 | void PerfMonitor::addGlobalVariables() { |
104 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_total_start" , InitialValue: Builder.getInt64(C: 0), |
105 | Location: &CyclesTotalStartPtr); |
106 | |
107 | TryRegisterGlobal(M, Name: "__polly_perf_initialized" , InitialValue: Builder.getInt1(V: false), |
108 | Location: &AlreadyInitializedPtr); |
109 | |
110 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scops" , InitialValue: Builder.getInt64(C: 0), |
111 | Location: &CyclesInScopsPtr); |
112 | |
113 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scop_start" , InitialValue: Builder.getInt64(C: 0), |
114 | Location: &CyclesInScopStartPtr); |
115 | } |
116 | |
117 | static const char *InitFunctionName = "__polly_perf_init" ; |
118 | static const char *FinalReportingFunctionName = "__polly_perf_final" ; |
119 | |
120 | static BasicBlock *FinalStartBB = nullptr; |
121 | static ReturnInst *ReturnFromFinal = nullptr; |
122 | |
123 | Function *PerfMonitor::insertFinalReporting() { |
124 | // Create new function. |
125 | GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; |
126 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false); |
127 | Function *ExitFn = |
128 | Function::Create(Ty, Linkage, N: FinalReportingFunctionName, M); |
129 | FinalStartBB = BasicBlock::Create(Context&: M->getContext(), Name: "start" , Parent: ExitFn); |
130 | Builder.SetInsertPoint(FinalStartBB); |
131 | |
132 | if (!Supported) { |
133 | RuntimeDebugBuilder::createCPUPrinter( |
134 | Builder, args: "Polly runtime information generation not supported\n" ); |
135 | Builder.CreateRetVoid(); |
136 | return ExitFn; |
137 | } |
138 | |
139 | // Measure current cycles and compute final timings. |
140 | Function *RDTSCPFn = getRDTSCP(); |
141 | |
142 | Type *Int64Ty = Builder.getInt64Ty(); |
143 | Value *CurrentCycles = |
144 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
145 | Value *CyclesStart = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesTotalStartPtr, isVolatile: true); |
146 | Value *CyclesTotal = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart); |
147 | Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true); |
148 | |
149 | // Print the runtime information. |
150 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Polly runtime information\n" ); |
151 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "-------------------------\n" ); |
152 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Total: " , args: CyclesTotal, args: "\n" ); |
153 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Scops: " , args: CyclesInScops, |
154 | args: "\n" ); |
155 | |
156 | // Print the preamble for per-scop information. |
157 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "\n" ); |
158 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Per SCoP information\n" ); |
159 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "--------------------\n" ); |
160 | |
161 | RuntimeDebugBuilder::createCPUPrinter( |
162 | Builder, args: "scop function, " |
163 | "entry block name, exit block name, total time, trip count\n" ); |
164 | ReturnFromFinal = Builder.CreateRetVoid(); |
165 | return ExitFn; |
166 | } |
167 | |
168 | void PerfMonitor::AppendScopReporting() { |
169 | if (!Supported) |
170 | return; |
171 | |
172 | assert(FinalStartBB && "Expected FinalStartBB to be initialized by " |
173 | "PerfMonitor::insertFinalReporting." ); |
174 | assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by " |
175 | "PerfMonitor::insertFinalReporting." ); |
176 | |
177 | Builder.SetInsertPoint(FinalStartBB); |
178 | ReturnFromFinal->eraseFromParent(); |
179 | |
180 | Type *Int64Ty = Builder.getInt64Ty(); |
181 | Value *CyclesInCurrentScop = |
182 | Builder.CreateLoad(Ty: Int64Ty, Ptr: this->CyclesInCurrentScopPtr, isVolatile: true); |
183 | |
184 | Value *TripCountForCurrentScop = |
185 | Builder.CreateLoad(Ty: Int64Ty, Ptr: this->TripCountForCurrentScopPtr, isVolatile: true); |
186 | |
187 | std::string EntryName, ExitName; |
188 | std::tie(args&: EntryName, args&: ExitName) = S.getEntryExitStr(); |
189 | |
190 | // print in CSV for easy parsing with other tools. |
191 | RuntimeDebugBuilder::createCPUPrinter( |
192 | Builder, args: S.getFunction().getName(), args: ", " , args: EntryName, args: ", " , args: ExitName, args: ", " , |
193 | args: CyclesInCurrentScop, args: ", " , args: TripCountForCurrentScop, args: "\n" ); |
194 | |
195 | ReturnFromFinal = Builder.CreateRetVoid(); |
196 | } |
197 | |
198 | static Function *FinalReporting = nullptr; |
199 | |
200 | void PerfMonitor::initialize() { |
201 | addGlobalVariables(); |
202 | addScopCounter(); |
203 | |
204 | // Ensure that we only add the final reporting function once. |
205 | // On later invocations, append to the reporting function. |
206 | if (!FinalReporting) { |
207 | FinalReporting = insertFinalReporting(); |
208 | |
209 | Function *InitFn = insertInitFunction(FinalReporting); |
210 | addToGlobalConstructors(Fn: InitFn); |
211 | } |
212 | |
213 | AppendScopReporting(); |
214 | } |
215 | |
216 | Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { |
217 | // Insert function definition and BBs. |
218 | GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; |
219 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false); |
220 | Function *InitFn = Function::Create(Ty, Linkage, N: InitFunctionName, M); |
221 | BasicBlock *Start = BasicBlock::Create(Context&: M->getContext(), Name: "start" , Parent: InitFn); |
222 | BasicBlock *EarlyReturn = |
223 | BasicBlock::Create(Context&: M->getContext(), Name: "earlyreturn" , Parent: InitFn); |
224 | BasicBlock *InitBB = BasicBlock::Create(Context&: M->getContext(), Name: "initbb" , Parent: InitFn); |
225 | |
226 | Builder.SetInsertPoint(Start); |
227 | |
228 | // Check if this function was already run. If yes, return. |
229 | // |
230 | // In case profiling has been enabled in multiple translation units, the |
231 | // initializer function will be added to the global constructors list of |
232 | // each translation unit. When merging translation units, the global |
233 | // constructor lists are just appended, such that the initializer will appear |
234 | // multiple times. To avoid initializations being run multiple times (and |
235 | // especially to avoid that atExitFn is called more than once), we bail |
236 | // out if the initializer is run more than once. |
237 | Value *HasRunBefore = |
238 | Builder.CreateLoad(Ty: Builder.getInt1Ty(), Ptr: AlreadyInitializedPtr); |
239 | Builder.CreateCondBr(Cond: HasRunBefore, True: EarlyReturn, False: InitBB); |
240 | Builder.SetInsertPoint(EarlyReturn); |
241 | Builder.CreateRetVoid(); |
242 | |
243 | // Keep track that this function has been run once. |
244 | Builder.SetInsertPoint(InitBB); |
245 | Value *True = Builder.getInt1(V: true); |
246 | Builder.CreateStore(Val: True, Ptr: AlreadyInitializedPtr); |
247 | |
248 | // Register the final reporting function with atexit(). |
249 | Value *FinalReportingPtr = |
250 | Builder.CreatePointerCast(V: FinalReporting, DestTy: Builder.getPtrTy()); |
251 | Function *AtExitFn = getAtExit(); |
252 | Builder.CreateCall(Callee: AtExitFn, Args: {FinalReportingPtr}); |
253 | |
254 | if (Supported) { |
255 | // Read the currently cycle counter and store the result for later. |
256 | Function *RDTSCPFn = getRDTSCP(); |
257 | Value *CurrentCycles = |
258 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
259 | Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesTotalStartPtr, isVolatile: true); |
260 | } |
261 | Builder.CreateRetVoid(); |
262 | |
263 | return InitFn; |
264 | } |
265 | |
266 | void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { |
267 | if (!Supported) |
268 | return; |
269 | |
270 | Builder.SetInsertPoint(InsertBefore->getIterator()); |
271 | Function *RDTSCPFn = getRDTSCP(); |
272 | Value *CurrentCycles = |
273 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
274 | Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesInScopStartPtr, isVolatile: true); |
275 | } |
276 | |
277 | void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { |
278 | if (!Supported) |
279 | return; |
280 | |
281 | Builder.SetInsertPoint(InsertBefore->getIterator()); |
282 | Function *RDTSCPFn = getRDTSCP(); |
283 | Type *Int64Ty = Builder.getInt64Ty(); |
284 | LoadInst *CyclesStart = |
285 | Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopStartPtr, isVolatile: true); |
286 | Value *CurrentCycles = |
287 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
288 | Value *CyclesInScop = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart); |
289 | Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true); |
290 | CyclesInScops = Builder.CreateAdd(LHS: CyclesInScops, RHS: CyclesInScop); |
291 | Builder.CreateStore(Val: CyclesInScops, Ptr: CyclesInScopsPtr, isVolatile: true); |
292 | |
293 | Value *CyclesInCurrentScop = |
294 | Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInCurrentScopPtr, isVolatile: true); |
295 | CyclesInCurrentScop = Builder.CreateAdd(LHS: CyclesInCurrentScop, RHS: CyclesInScop); |
296 | Builder.CreateStore(Val: CyclesInCurrentScop, Ptr: CyclesInCurrentScopPtr, isVolatile: true); |
297 | |
298 | Value *TripCountForCurrentScop = |
299 | Builder.CreateLoad(Ty: Int64Ty, Ptr: TripCountForCurrentScopPtr, isVolatile: true); |
300 | TripCountForCurrentScop = |
301 | Builder.CreateAdd(LHS: TripCountForCurrentScop, RHS: Builder.getInt64(C: 1)); |
302 | Builder.CreateStore(Val: TripCountForCurrentScop, Ptr: TripCountForCurrentScopPtr, |
303 | isVolatile: true); |
304 | } |
305 | |