1//===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10
11#include "polly/CodeGen/PerfMonitor.h"
12#include "polly/CodeGen/RuntimeDebugBuilder.h"
13#include "polly/ScopInfo.h"
14#include "llvm/ADT/Twine.h"
15#include "llvm/IR/IntrinsicsX86.h"
16#include "llvm/TargetParser/Triple.h"
17
18using namespace llvm;
19using namespace polly;
20
21Function *PerfMonitor::getAtExit() {
22 const char *Name = "atexit";
23 Function *F = M->getFunction(Name);
24
25 if (!F) {
26 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
27 FunctionType *Ty =
28 FunctionType::get(Result: Builder.getInt32Ty(), Params: {Builder.getPtrTy()}, isVarArg: false);
29 F = Function::Create(Ty, Linkage, N: Name, M);
30 }
31
32 return F;
33}
34
35void PerfMonitor::addToGlobalConstructors(Function *Fn) {
36 const char *Name = "llvm.global_ctors";
37 GlobalVariable *GV = M->getGlobalVariable(Name);
38 std::vector<Constant *> V;
39
40 if (GV) {
41 Constant *Array = GV->getInitializer();
42 for (Value *X : Array->operand_values())
43 V.push_back(x: cast<Constant>(Val: X));
44 GV->eraseFromParent();
45 }
46
47 StructType *ST =
48 StructType::get(elt1: Builder.getInt32Ty(), elts: Fn->getType(), elts: Builder.getPtrTy());
49
50 V.push_back(
51 x: ConstantStruct::get(T: ST, Vs: Builder.getInt32(C: 10), Vs: Fn,
52 Vs: ConstantPointerNull::get(T: Builder.getPtrTy())));
53 ArrayType *Ty = ArrayType::get(ElementType: ST, NumElements: V.size());
54
55 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
56 ConstantArray::get(T: Ty, V), Name, nullptr,
57 GlobalVariable::NotThreadLocal);
58}
59
60Function *PerfMonitor::getRDTSCP() {
61 return Intrinsic::getDeclaration(M, Intrinsic::id: x86_rdtscp);
62}
63
64PerfMonitor::PerfMonitor(const Scop &S, Module *M)
65 : M(M), Builder(M->getContext()), S(S) {
66 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
67 Supported = true;
68 else
69 Supported = false;
70}
71
72static void TryRegisterGlobal(Module *M, const char *Name,
73 Constant *InitialValue, Value **Location) {
74 *Location = M->getGlobalVariable(Name);
75
76 if (!*Location)
77 *Location = new GlobalVariable(
78 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
79 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
80}
81
82// Generate a unique name that is usable as a LLVM name for a scop to name its
83// performance counter.
84static std::string GetScopUniqueVarname(const Scop &S) {
85 std::string EntryString, ExitString;
86 std::tie(args&: EntryString, args&: ExitString) = S.getEntryExitStr();
87
88 return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
89 EntryString + "__to__" + ExitString)
90 .str();
91}
92
93void PerfMonitor::addScopCounter() {
94 const std::string varname = GetScopUniqueVarname(S);
95 TryRegisterGlobal(M, Name: (varname + "_cycles").c_str(), InitialValue: Builder.getInt64(C: 0),
96 Location: &CyclesInCurrentScopPtr);
97
98 TryRegisterGlobal(M, Name: (varname + "_trip_count").c_str(), InitialValue: Builder.getInt64(C: 0),
99 Location: &TripCountForCurrentScopPtr);
100}
101
102void PerfMonitor::addGlobalVariables() {
103 TryRegisterGlobal(M, Name: "__polly_perf_cycles_total_start", InitialValue: Builder.getInt64(C: 0),
104 Location: &CyclesTotalStartPtr);
105
106 TryRegisterGlobal(M, Name: "__polly_perf_initialized", InitialValue: Builder.getInt1(V: false),
107 Location: &AlreadyInitializedPtr);
108
109 TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scops", InitialValue: Builder.getInt64(C: 0),
110 Location: &CyclesInScopsPtr);
111
112 TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scop_start", InitialValue: Builder.getInt64(C: 0),
113 Location: &CyclesInScopStartPtr);
114}
115
116static const char *InitFunctionName = "__polly_perf_init";
117static const char *FinalReportingFunctionName = "__polly_perf_final";
118
119static BasicBlock *FinalStartBB = nullptr;
120static ReturnInst *ReturnFromFinal = nullptr;
121
122Function *PerfMonitor::insertFinalReporting() {
123 // Create new function.
124 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
125 FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false);
126 Function *ExitFn =
127 Function::Create(Ty, Linkage, N: FinalReportingFunctionName, M);
128 FinalStartBB = BasicBlock::Create(Context&: M->getContext(), Name: "start", Parent: ExitFn);
129 Builder.SetInsertPoint(FinalStartBB);
130
131 if (!Supported) {
132 RuntimeDebugBuilder::createCPUPrinter(
133 Builder, args: "Polly runtime information generation not supported\n");
134 Builder.CreateRetVoid();
135 return ExitFn;
136 }
137
138 // Measure current cycles and compute final timings.
139 Function *RDTSCPFn = getRDTSCP();
140
141 Type *Int64Ty = Builder.getInt64Ty();
142 Value *CurrentCycles =
143 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
144 Value *CyclesStart = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesTotalStartPtr, isVolatile: true);
145 Value *CyclesTotal = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart);
146 Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true);
147
148 // Print the runtime information.
149 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Polly runtime information\n");
150 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "-------------------------\n");
151 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Total: ", args: CyclesTotal, args: "\n");
152 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Scops: ", args: CyclesInScops,
153 args: "\n");
154
155 // Print the preamble for per-scop information.
156 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "\n");
157 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Per SCoP information\n");
158 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "--------------------\n");
159
160 RuntimeDebugBuilder::createCPUPrinter(
161 Builder, args: "scop function, "
162 "entry block name, exit block name, total time, trip count\n");
163 ReturnFromFinal = Builder.CreateRetVoid();
164 return ExitFn;
165}
166
167void PerfMonitor::AppendScopReporting() {
168 if (!Supported)
169 return;
170
171 assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
172 "PerfMonitor::insertFinalReporting.");
173 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
174 "PerfMonitor::insertFinalReporting.");
175
176 Builder.SetInsertPoint(FinalStartBB);
177 ReturnFromFinal->eraseFromParent();
178
179 Type *Int64Ty = Builder.getInt64Ty();
180 Value *CyclesInCurrentScop =
181 Builder.CreateLoad(Ty: Int64Ty, Ptr: this->CyclesInCurrentScopPtr, isVolatile: true);
182
183 Value *TripCountForCurrentScop =
184 Builder.CreateLoad(Ty: Int64Ty, Ptr: this->TripCountForCurrentScopPtr, isVolatile: true);
185
186 std::string EntryName, ExitName;
187 std::tie(args&: EntryName, args&: ExitName) = S.getEntryExitStr();
188
189 // print in CSV for easy parsing with other tools.
190 RuntimeDebugBuilder::createCPUPrinter(
191 Builder, args: S.getFunction().getName(), args: ", ", args: EntryName, args: ", ", args: ExitName, args: ", ",
192 args: CyclesInCurrentScop, args: ", ", args: TripCountForCurrentScop, args: "\n");
193
194 ReturnFromFinal = Builder.CreateRetVoid();
195}
196
197static Function *FinalReporting = nullptr;
198
199void PerfMonitor::initialize() {
200 addGlobalVariables();
201 addScopCounter();
202
203 // Ensure that we only add the final reporting function once.
204 // On later invocations, append to the reporting function.
205 if (!FinalReporting) {
206 FinalReporting = insertFinalReporting();
207
208 Function *InitFn = insertInitFunction(FinalReporting);
209 addToGlobalConstructors(Fn: InitFn);
210 }
211
212 AppendScopReporting();
213}
214
215Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
216 // Insert function definition and BBs.
217 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
218 FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false);
219 Function *InitFn = Function::Create(Ty, Linkage, N: InitFunctionName, M);
220 BasicBlock *Start = BasicBlock::Create(Context&: M->getContext(), Name: "start", Parent: InitFn);
221 BasicBlock *EarlyReturn =
222 BasicBlock::Create(Context&: M->getContext(), Name: "earlyreturn", Parent: InitFn);
223 BasicBlock *InitBB = BasicBlock::Create(Context&: M->getContext(), Name: "initbb", Parent: InitFn);
224
225 Builder.SetInsertPoint(Start);
226
227 // Check if this function was already run. If yes, return.
228 //
229 // In case profiling has been enabled in multiple translation units, the
230 // initializer function will be added to the global constructors list of
231 // each translation unit. When merging translation units, the global
232 // constructor lists are just appended, such that the initializer will appear
233 // multiple times. To avoid initializations being run multiple times (and
234 // especially to avoid that atExitFn is called more than once), we bail
235 // out if the initializer is run more than once.
236 Value *HasRunBefore =
237 Builder.CreateLoad(Ty: Builder.getInt1Ty(), Ptr: AlreadyInitializedPtr);
238 Builder.CreateCondBr(Cond: HasRunBefore, True: EarlyReturn, False: InitBB);
239 Builder.SetInsertPoint(EarlyReturn);
240 Builder.CreateRetVoid();
241
242 // Keep track that this function has been run once.
243 Builder.SetInsertPoint(InitBB);
244 Value *True = Builder.getInt1(V: true);
245 Builder.CreateStore(Val: True, Ptr: AlreadyInitializedPtr);
246
247 // Register the final reporting function with atexit().
248 Value *FinalReportingPtr =
249 Builder.CreatePointerCast(V: FinalReporting, DestTy: Builder.getPtrTy());
250 Function *AtExitFn = getAtExit();
251 Builder.CreateCall(Callee: AtExitFn, Args: {FinalReportingPtr});
252
253 if (Supported) {
254 // Read the currently cycle counter and store the result for later.
255 Function *RDTSCPFn = getRDTSCP();
256 Value *CurrentCycles =
257 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
258 Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesTotalStartPtr, isVolatile: true);
259 }
260 Builder.CreateRetVoid();
261
262 return InitFn;
263}
264
265void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
266 if (!Supported)
267 return;
268
269 Builder.SetInsertPoint(InsertBefore);
270 Function *RDTSCPFn = getRDTSCP();
271 Value *CurrentCycles =
272 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
273 Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesInScopStartPtr, isVolatile: true);
274}
275
276void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
277 if (!Supported)
278 return;
279
280 Builder.SetInsertPoint(InsertBefore);
281 Function *RDTSCPFn = getRDTSCP();
282 Type *Int64Ty = Builder.getInt64Ty();
283 LoadInst *CyclesStart =
284 Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopStartPtr, isVolatile: true);
285 Value *CurrentCycles =
286 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
287 Value *CyclesInScop = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart);
288 Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true);
289 CyclesInScops = Builder.CreateAdd(LHS: CyclesInScops, RHS: CyclesInScop);
290 Builder.CreateStore(Val: CyclesInScops, Ptr: CyclesInScopsPtr, isVolatile: true);
291
292 Value *CyclesInCurrentScop =
293 Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInCurrentScopPtr, isVolatile: true);
294 CyclesInCurrentScop = Builder.CreateAdd(LHS: CyclesInCurrentScop, RHS: CyclesInScop);
295 Builder.CreateStore(Val: CyclesInCurrentScop, Ptr: CyclesInCurrentScopPtr, isVolatile: true);
296
297 Value *TripCountForCurrentScop =
298 Builder.CreateLoad(Ty: Int64Ty, Ptr: TripCountForCurrentScopPtr, isVolatile: true);
299 TripCountForCurrentScop =
300 Builder.CreateAdd(LHS: TripCountForCurrentScop, RHS: Builder.getInt64(C: 1));
301 Builder.CreateStore(Val: TripCountForCurrentScop, Ptr: TripCountForCurrentScopPtr,
302 isVolatile: true);
303}
304

source code of polly/lib/CodeGen/PerfMonitor.cpp