1//===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10
11#include "polly/CodeGen/PerfMonitor.h"
12#include "polly/CodeGen/RuntimeDebugBuilder.h"
13#include "polly/ScopInfo.h"
14#include "llvm/ADT/Twine.h"
15#include "llvm/IR/IntrinsicsX86.h"
16#include "llvm/IR/Module.h"
17#include "llvm/TargetParser/Triple.h"
18
19using namespace llvm;
20using namespace polly;
21
22Function *PerfMonitor::getAtExit() {
23 const char *Name = "atexit";
24 Function *F = M->getFunction(Name);
25
26 if (!F) {
27 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
28 FunctionType *Ty =
29 FunctionType::get(Result: Builder.getInt32Ty(), Params: {Builder.getPtrTy()}, isVarArg: false);
30 F = Function::Create(Ty, Linkage, N: Name, M);
31 }
32
33 return F;
34}
35
36void PerfMonitor::addToGlobalConstructors(Function *Fn) {
37 const char *Name = "llvm.global_ctors";
38 GlobalVariable *GV = M->getGlobalVariable(Name);
39 std::vector<Constant *> V;
40
41 if (GV) {
42 Constant *Array = GV->getInitializer();
43 for (Value *X : Array->operand_values())
44 V.push_back(x: cast<Constant>(Val: X));
45 GV->eraseFromParent();
46 }
47
48 StructType *ST =
49 StructType::get(elt1: Builder.getInt32Ty(), elts: Fn->getType(), elts: Builder.getPtrTy());
50
51 V.push_back(
52 x: ConstantStruct::get(T: ST, Vs: Builder.getInt32(C: 10), Vs: Fn,
53 Vs: ConstantPointerNull::get(T: Builder.getPtrTy())));
54 ArrayType *Ty = ArrayType::get(ElementType: ST, NumElements: V.size());
55
56 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
57 ConstantArray::get(T: Ty, V), Name, nullptr,
58 GlobalVariable::NotThreadLocal);
59}
60
61Function *PerfMonitor::getRDTSCP() {
62 return Intrinsic::getOrInsertDeclaration(M, Intrinsic::id: x86_rdtscp);
63}
64
65PerfMonitor::PerfMonitor(const Scop &S, Module *M)
66 : M(M), Builder(M->getContext()), S(S) {
67 if (M->getTargetTriple().getArch() == llvm::Triple::x86_64)
68 Supported = true;
69 else
70 Supported = false;
71}
72
73static void TryRegisterGlobal(Module *M, const char *Name,
74 Constant *InitialValue, Value **Location) {
75 *Location = M->getGlobalVariable(Name);
76
77 if (!*Location)
78 *Location = new GlobalVariable(
79 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
80 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
81}
82
83// Generate a unique name that is usable as a LLVM name for a scop to name its
84// performance counter.
85static std::string GetScopUniqueVarname(const Scop &S) {
86 std::string EntryString, ExitString;
87 std::tie(args&: EntryString, args&: ExitString) = S.getEntryExitStr();
88
89 return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
90 EntryString + "__to__" + ExitString)
91 .str();
92}
93
94void PerfMonitor::addScopCounter() {
95 const std::string varname = GetScopUniqueVarname(S);
96 TryRegisterGlobal(M, Name: (varname + "_cycles").c_str(), InitialValue: Builder.getInt64(C: 0),
97 Location: &CyclesInCurrentScopPtr);
98
99 TryRegisterGlobal(M, Name: (varname + "_trip_count").c_str(), InitialValue: Builder.getInt64(C: 0),
100 Location: &TripCountForCurrentScopPtr);
101}
102
103void PerfMonitor::addGlobalVariables() {
104 TryRegisterGlobal(M, Name: "__polly_perf_cycles_total_start", InitialValue: Builder.getInt64(C: 0),
105 Location: &CyclesTotalStartPtr);
106
107 TryRegisterGlobal(M, Name: "__polly_perf_initialized", InitialValue: Builder.getInt1(V: false),
108 Location: &AlreadyInitializedPtr);
109
110 TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scops", InitialValue: Builder.getInt64(C: 0),
111 Location: &CyclesInScopsPtr);
112
113 TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scop_start", InitialValue: Builder.getInt64(C: 0),
114 Location: &CyclesInScopStartPtr);
115}
116
117static const char *InitFunctionName = "__polly_perf_init";
118static const char *FinalReportingFunctionName = "__polly_perf_final";
119
120static BasicBlock *FinalStartBB = nullptr;
121static ReturnInst *ReturnFromFinal = nullptr;
122
123Function *PerfMonitor::insertFinalReporting() {
124 // Create new function.
125 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
126 FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false);
127 Function *ExitFn =
128 Function::Create(Ty, Linkage, N: FinalReportingFunctionName, M);
129 FinalStartBB = BasicBlock::Create(Context&: M->getContext(), Name: "start", Parent: ExitFn);
130 Builder.SetInsertPoint(FinalStartBB);
131
132 if (!Supported) {
133 RuntimeDebugBuilder::createCPUPrinter(
134 Builder, args: "Polly runtime information generation not supported\n");
135 Builder.CreateRetVoid();
136 return ExitFn;
137 }
138
139 // Measure current cycles and compute final timings.
140 Function *RDTSCPFn = getRDTSCP();
141
142 Type *Int64Ty = Builder.getInt64Ty();
143 Value *CurrentCycles =
144 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
145 Value *CyclesStart = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesTotalStartPtr, isVolatile: true);
146 Value *CyclesTotal = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart);
147 Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true);
148
149 // Print the runtime information.
150 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Polly runtime information\n");
151 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "-------------------------\n");
152 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Total: ", args: CyclesTotal, args: "\n");
153 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Scops: ", args: CyclesInScops,
154 args: "\n");
155
156 // Print the preamble for per-scop information.
157 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "\n");
158 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Per SCoP information\n");
159 RuntimeDebugBuilder::createCPUPrinter(Builder, args: "--------------------\n");
160
161 RuntimeDebugBuilder::createCPUPrinter(
162 Builder, args: "scop function, "
163 "entry block name, exit block name, total time, trip count\n");
164 ReturnFromFinal = Builder.CreateRetVoid();
165 return ExitFn;
166}
167
168void PerfMonitor::AppendScopReporting() {
169 if (!Supported)
170 return;
171
172 assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
173 "PerfMonitor::insertFinalReporting.");
174 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
175 "PerfMonitor::insertFinalReporting.");
176
177 Builder.SetInsertPoint(FinalStartBB);
178 ReturnFromFinal->eraseFromParent();
179
180 Type *Int64Ty = Builder.getInt64Ty();
181 Value *CyclesInCurrentScop =
182 Builder.CreateLoad(Ty: Int64Ty, Ptr: this->CyclesInCurrentScopPtr, isVolatile: true);
183
184 Value *TripCountForCurrentScop =
185 Builder.CreateLoad(Ty: Int64Ty, Ptr: this->TripCountForCurrentScopPtr, isVolatile: true);
186
187 std::string EntryName, ExitName;
188 std::tie(args&: EntryName, args&: ExitName) = S.getEntryExitStr();
189
190 // print in CSV for easy parsing with other tools.
191 RuntimeDebugBuilder::createCPUPrinter(
192 Builder, args: S.getFunction().getName(), args: ", ", args: EntryName, args: ", ", args: ExitName, args: ", ",
193 args: CyclesInCurrentScop, args: ", ", args: TripCountForCurrentScop, args: "\n");
194
195 ReturnFromFinal = Builder.CreateRetVoid();
196}
197
198static Function *FinalReporting = nullptr;
199
200void PerfMonitor::initialize() {
201 addGlobalVariables();
202 addScopCounter();
203
204 // Ensure that we only add the final reporting function once.
205 // On later invocations, append to the reporting function.
206 if (!FinalReporting) {
207 FinalReporting = insertFinalReporting();
208
209 Function *InitFn = insertInitFunction(FinalReporting);
210 addToGlobalConstructors(Fn: InitFn);
211 }
212
213 AppendScopReporting();
214}
215
216Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
217 // Insert function definition and BBs.
218 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
219 FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false);
220 Function *InitFn = Function::Create(Ty, Linkage, N: InitFunctionName, M);
221 BasicBlock *Start = BasicBlock::Create(Context&: M->getContext(), Name: "start", Parent: InitFn);
222 BasicBlock *EarlyReturn =
223 BasicBlock::Create(Context&: M->getContext(), Name: "earlyreturn", Parent: InitFn);
224 BasicBlock *InitBB = BasicBlock::Create(Context&: M->getContext(), Name: "initbb", Parent: InitFn);
225
226 Builder.SetInsertPoint(Start);
227
228 // Check if this function was already run. If yes, return.
229 //
230 // In case profiling has been enabled in multiple translation units, the
231 // initializer function will be added to the global constructors list of
232 // each translation unit. When merging translation units, the global
233 // constructor lists are just appended, such that the initializer will appear
234 // multiple times. To avoid initializations being run multiple times (and
235 // especially to avoid that atExitFn is called more than once), we bail
236 // out if the initializer is run more than once.
237 Value *HasRunBefore =
238 Builder.CreateLoad(Ty: Builder.getInt1Ty(), Ptr: AlreadyInitializedPtr);
239 Builder.CreateCondBr(Cond: HasRunBefore, True: EarlyReturn, False: InitBB);
240 Builder.SetInsertPoint(EarlyReturn);
241 Builder.CreateRetVoid();
242
243 // Keep track that this function has been run once.
244 Builder.SetInsertPoint(InitBB);
245 Value *True = Builder.getInt1(V: true);
246 Builder.CreateStore(Val: True, Ptr: AlreadyInitializedPtr);
247
248 // Register the final reporting function with atexit().
249 Value *FinalReportingPtr =
250 Builder.CreatePointerCast(V: FinalReporting, DestTy: Builder.getPtrTy());
251 Function *AtExitFn = getAtExit();
252 Builder.CreateCall(Callee: AtExitFn, Args: {FinalReportingPtr});
253
254 if (Supported) {
255 // Read the currently cycle counter and store the result for later.
256 Function *RDTSCPFn = getRDTSCP();
257 Value *CurrentCycles =
258 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
259 Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesTotalStartPtr, isVolatile: true);
260 }
261 Builder.CreateRetVoid();
262
263 return InitFn;
264}
265
266void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
267 if (!Supported)
268 return;
269
270 Builder.SetInsertPoint(InsertBefore->getIterator());
271 Function *RDTSCPFn = getRDTSCP();
272 Value *CurrentCycles =
273 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
274 Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesInScopStartPtr, isVolatile: true);
275}
276
277void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
278 if (!Supported)
279 return;
280
281 Builder.SetInsertPoint(InsertBefore->getIterator());
282 Function *RDTSCPFn = getRDTSCP();
283 Type *Int64Ty = Builder.getInt64Ty();
284 LoadInst *CyclesStart =
285 Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopStartPtr, isVolatile: true);
286 Value *CurrentCycles =
287 Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0});
288 Value *CyclesInScop = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart);
289 Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true);
290 CyclesInScops = Builder.CreateAdd(LHS: CyclesInScops, RHS: CyclesInScop);
291 Builder.CreateStore(Val: CyclesInScops, Ptr: CyclesInScopsPtr, isVolatile: true);
292
293 Value *CyclesInCurrentScop =
294 Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInCurrentScopPtr, isVolatile: true);
295 CyclesInCurrentScop = Builder.CreateAdd(LHS: CyclesInCurrentScop, RHS: CyclesInScop);
296 Builder.CreateStore(Val: CyclesInCurrentScop, Ptr: CyclesInCurrentScopPtr, isVolatile: true);
297
298 Value *TripCountForCurrentScop =
299 Builder.CreateLoad(Ty: Int64Ty, Ptr: TripCountForCurrentScopPtr, isVolatile: true);
300 TripCountForCurrentScop =
301 Builder.CreateAdd(LHS: TripCountForCurrentScop, RHS: Builder.getInt64(C: 1));
302 Builder.CreateStore(Val: TripCountForCurrentScop, Ptr: TripCountForCurrentScopPtr,
303 isVolatile: true);
304}
305

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of polly/lib/CodeGen/PerfMonitor.cpp