| 1 | //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | //===----------------------------------------------------------------------===// |
| 10 | |
| 11 | #include "polly/CodeGen/PerfMonitor.h" |
| 12 | #include "polly/CodeGen/RuntimeDebugBuilder.h" |
| 13 | #include "polly/ScopInfo.h" |
| 14 | #include "llvm/ADT/Twine.h" |
| 15 | #include "llvm/IR/IntrinsicsX86.h" |
| 16 | #include "llvm/IR/Module.h" |
| 17 | #include "llvm/TargetParser/Triple.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | using namespace polly; |
| 21 | |
| 22 | Function *PerfMonitor::getAtExit() { |
| 23 | const char *Name = "atexit" ; |
| 24 | Function *F = M->getFunction(Name); |
| 25 | |
| 26 | if (!F) { |
| 27 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
| 28 | FunctionType *Ty = |
| 29 | FunctionType::get(Result: Builder.getInt32Ty(), Params: {Builder.getPtrTy()}, isVarArg: false); |
| 30 | F = Function::Create(Ty, Linkage, N: Name, M); |
| 31 | } |
| 32 | |
| 33 | return F; |
| 34 | } |
| 35 | |
| 36 | void PerfMonitor::addToGlobalConstructors(Function *Fn) { |
| 37 | const char *Name = "llvm.global_ctors" ; |
| 38 | GlobalVariable *GV = M->getGlobalVariable(Name); |
| 39 | std::vector<Constant *> V; |
| 40 | |
| 41 | if (GV) { |
| 42 | Constant *Array = GV->getInitializer(); |
| 43 | for (Value *X : Array->operand_values()) |
| 44 | V.push_back(x: cast<Constant>(Val: X)); |
| 45 | GV->eraseFromParent(); |
| 46 | } |
| 47 | |
| 48 | StructType *ST = |
| 49 | StructType::get(elt1: Builder.getInt32Ty(), elts: Fn->getType(), elts: Builder.getPtrTy()); |
| 50 | |
| 51 | V.push_back( |
| 52 | x: ConstantStruct::get(T: ST, Vs: Builder.getInt32(C: 10), Vs: Fn, |
| 53 | Vs: ConstantPointerNull::get(T: Builder.getPtrTy()))); |
| 54 | ArrayType *Ty = ArrayType::get(ElementType: ST, NumElements: V.size()); |
| 55 | |
| 56 | GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, |
| 57 | ConstantArray::get(T: Ty, V), Name, nullptr, |
| 58 | GlobalVariable::NotThreadLocal); |
| 59 | } |
| 60 | |
| 61 | Function *PerfMonitor::getRDTSCP() { |
| 62 | return Intrinsic::getOrInsertDeclaration(M, Intrinsic::id: x86_rdtscp); |
| 63 | } |
| 64 | |
| 65 | PerfMonitor::PerfMonitor(const Scop &S, Module *M) |
| 66 | : M(M), Builder(M->getContext()), S(S) { |
| 67 | if (M->getTargetTriple().getArch() == llvm::Triple::x86_64) |
| 68 | Supported = true; |
| 69 | else |
| 70 | Supported = false; |
| 71 | } |
| 72 | |
| 73 | static void TryRegisterGlobal(Module *M, const char *Name, |
| 74 | Constant *InitialValue, Value **Location) { |
| 75 | *Location = M->getGlobalVariable(Name); |
| 76 | |
| 77 | if (!*Location) |
| 78 | *Location = new GlobalVariable( |
| 79 | *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, |
| 80 | InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); |
| 81 | } |
| 82 | |
| 83 | // Generate a unique name that is usable as a LLVM name for a scop to name its |
| 84 | // performance counter. |
| 85 | static std::string GetScopUniqueVarname(const Scop &S) { |
| 86 | std::string EntryString, ExitString; |
| 87 | std::tie(args&: EntryString, args&: ExitString) = S.getEntryExitStr(); |
| 88 | |
| 89 | return (Twine("__polly_perf_in_" ) + S.getFunction().getName() + "_from__" + |
| 90 | EntryString + "__to__" + ExitString) |
| 91 | .str(); |
| 92 | } |
| 93 | |
| 94 | void PerfMonitor::addScopCounter() { |
| 95 | const std::string varname = GetScopUniqueVarname(S); |
| 96 | TryRegisterGlobal(M, Name: (varname + "_cycles" ).c_str(), InitialValue: Builder.getInt64(C: 0), |
| 97 | Location: &CyclesInCurrentScopPtr); |
| 98 | |
| 99 | TryRegisterGlobal(M, Name: (varname + "_trip_count" ).c_str(), InitialValue: Builder.getInt64(C: 0), |
| 100 | Location: &TripCountForCurrentScopPtr); |
| 101 | } |
| 102 | |
| 103 | void PerfMonitor::addGlobalVariables() { |
| 104 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_total_start" , InitialValue: Builder.getInt64(C: 0), |
| 105 | Location: &CyclesTotalStartPtr); |
| 106 | |
| 107 | TryRegisterGlobal(M, Name: "__polly_perf_initialized" , InitialValue: Builder.getInt1(V: false), |
| 108 | Location: &AlreadyInitializedPtr); |
| 109 | |
| 110 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scops" , InitialValue: Builder.getInt64(C: 0), |
| 111 | Location: &CyclesInScopsPtr); |
| 112 | |
| 113 | TryRegisterGlobal(M, Name: "__polly_perf_cycles_in_scop_start" , InitialValue: Builder.getInt64(C: 0), |
| 114 | Location: &CyclesInScopStartPtr); |
| 115 | } |
| 116 | |
| 117 | static const char *InitFunctionName = "__polly_perf_init" ; |
| 118 | static const char *FinalReportingFunctionName = "__polly_perf_final" ; |
| 119 | |
| 120 | static BasicBlock *FinalStartBB = nullptr; |
| 121 | static ReturnInst *ReturnFromFinal = nullptr; |
| 122 | |
| 123 | Function *PerfMonitor::insertFinalReporting() { |
| 124 | // Create new function. |
| 125 | GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; |
| 126 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false); |
| 127 | Function *ExitFn = |
| 128 | Function::Create(Ty, Linkage, N: FinalReportingFunctionName, M); |
| 129 | FinalStartBB = BasicBlock::Create(Context&: M->getContext(), Name: "start" , Parent: ExitFn); |
| 130 | Builder.SetInsertPoint(FinalStartBB); |
| 131 | |
| 132 | if (!Supported) { |
| 133 | RuntimeDebugBuilder::createCPUPrinter( |
| 134 | Builder, args: "Polly runtime information generation not supported\n" ); |
| 135 | Builder.CreateRetVoid(); |
| 136 | return ExitFn; |
| 137 | } |
| 138 | |
| 139 | // Measure current cycles and compute final timings. |
| 140 | Function *RDTSCPFn = getRDTSCP(); |
| 141 | |
| 142 | Type *Int64Ty = Builder.getInt64Ty(); |
| 143 | Value *CurrentCycles = |
| 144 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
| 145 | Value *CyclesStart = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesTotalStartPtr, isVolatile: true); |
| 146 | Value *CyclesTotal = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart); |
| 147 | Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true); |
| 148 | |
| 149 | // Print the runtime information. |
| 150 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Polly runtime information\n" ); |
| 151 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "-------------------------\n" ); |
| 152 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Total: " , args: CyclesTotal, args: "\n" ); |
| 153 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Scops: " , args: CyclesInScops, |
| 154 | args: "\n" ); |
| 155 | |
| 156 | // Print the preamble for per-scop information. |
| 157 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "\n" ); |
| 158 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "Per SCoP information\n" ); |
| 159 | RuntimeDebugBuilder::createCPUPrinter(Builder, args: "--------------------\n" ); |
| 160 | |
| 161 | RuntimeDebugBuilder::createCPUPrinter( |
| 162 | Builder, args: "scop function, " |
| 163 | "entry block name, exit block name, total time, trip count\n" ); |
| 164 | ReturnFromFinal = Builder.CreateRetVoid(); |
| 165 | return ExitFn; |
| 166 | } |
| 167 | |
| 168 | void PerfMonitor::AppendScopReporting() { |
| 169 | if (!Supported) |
| 170 | return; |
| 171 | |
| 172 | assert(FinalStartBB && "Expected FinalStartBB to be initialized by " |
| 173 | "PerfMonitor::insertFinalReporting." ); |
| 174 | assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by " |
| 175 | "PerfMonitor::insertFinalReporting." ); |
| 176 | |
| 177 | Builder.SetInsertPoint(FinalStartBB); |
| 178 | ReturnFromFinal->eraseFromParent(); |
| 179 | |
| 180 | Type *Int64Ty = Builder.getInt64Ty(); |
| 181 | Value *CyclesInCurrentScop = |
| 182 | Builder.CreateLoad(Ty: Int64Ty, Ptr: this->CyclesInCurrentScopPtr, isVolatile: true); |
| 183 | |
| 184 | Value *TripCountForCurrentScop = |
| 185 | Builder.CreateLoad(Ty: Int64Ty, Ptr: this->TripCountForCurrentScopPtr, isVolatile: true); |
| 186 | |
| 187 | std::string EntryName, ExitName; |
| 188 | std::tie(args&: EntryName, args&: ExitName) = S.getEntryExitStr(); |
| 189 | |
| 190 | // print in CSV for easy parsing with other tools. |
| 191 | RuntimeDebugBuilder::createCPUPrinter( |
| 192 | Builder, args: S.getFunction().getName(), args: ", " , args: EntryName, args: ", " , args: ExitName, args: ", " , |
| 193 | args: CyclesInCurrentScop, args: ", " , args: TripCountForCurrentScop, args: "\n" ); |
| 194 | |
| 195 | ReturnFromFinal = Builder.CreateRetVoid(); |
| 196 | } |
| 197 | |
| 198 | static Function *FinalReporting = nullptr; |
| 199 | |
| 200 | void PerfMonitor::initialize() { |
| 201 | addGlobalVariables(); |
| 202 | addScopCounter(); |
| 203 | |
| 204 | // Ensure that we only add the final reporting function once. |
| 205 | // On later invocations, append to the reporting function. |
| 206 | if (!FinalReporting) { |
| 207 | FinalReporting = insertFinalReporting(); |
| 208 | |
| 209 | Function *InitFn = insertInitFunction(FinalReporting); |
| 210 | addToGlobalConstructors(Fn: InitFn); |
| 211 | } |
| 212 | |
| 213 | AppendScopReporting(); |
| 214 | } |
| 215 | |
| 216 | Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { |
| 217 | // Insert function definition and BBs. |
| 218 | GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; |
| 219 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params: {}, isVarArg: false); |
| 220 | Function *InitFn = Function::Create(Ty, Linkage, N: InitFunctionName, M); |
| 221 | BasicBlock *Start = BasicBlock::Create(Context&: M->getContext(), Name: "start" , Parent: InitFn); |
| 222 | BasicBlock *EarlyReturn = |
| 223 | BasicBlock::Create(Context&: M->getContext(), Name: "earlyreturn" , Parent: InitFn); |
| 224 | BasicBlock *InitBB = BasicBlock::Create(Context&: M->getContext(), Name: "initbb" , Parent: InitFn); |
| 225 | |
| 226 | Builder.SetInsertPoint(Start); |
| 227 | |
| 228 | // Check if this function was already run. If yes, return. |
| 229 | // |
| 230 | // In case profiling has been enabled in multiple translation units, the |
| 231 | // initializer function will be added to the global constructors list of |
| 232 | // each translation unit. When merging translation units, the global |
| 233 | // constructor lists are just appended, such that the initializer will appear |
| 234 | // multiple times. To avoid initializations being run multiple times (and |
| 235 | // especially to avoid that atExitFn is called more than once), we bail |
| 236 | // out if the initializer is run more than once. |
| 237 | Value *HasRunBefore = |
| 238 | Builder.CreateLoad(Ty: Builder.getInt1Ty(), Ptr: AlreadyInitializedPtr); |
| 239 | Builder.CreateCondBr(Cond: HasRunBefore, True: EarlyReturn, False: InitBB); |
| 240 | Builder.SetInsertPoint(EarlyReturn); |
| 241 | Builder.CreateRetVoid(); |
| 242 | |
| 243 | // Keep track that this function has been run once. |
| 244 | Builder.SetInsertPoint(InitBB); |
| 245 | Value *True = Builder.getInt1(V: true); |
| 246 | Builder.CreateStore(Val: True, Ptr: AlreadyInitializedPtr); |
| 247 | |
| 248 | // Register the final reporting function with atexit(). |
| 249 | Value *FinalReportingPtr = |
| 250 | Builder.CreatePointerCast(V: FinalReporting, DestTy: Builder.getPtrTy()); |
| 251 | Function *AtExitFn = getAtExit(); |
| 252 | Builder.CreateCall(Callee: AtExitFn, Args: {FinalReportingPtr}); |
| 253 | |
| 254 | if (Supported) { |
| 255 | // Read the currently cycle counter and store the result for later. |
| 256 | Function *RDTSCPFn = getRDTSCP(); |
| 257 | Value *CurrentCycles = |
| 258 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
| 259 | Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesTotalStartPtr, isVolatile: true); |
| 260 | } |
| 261 | Builder.CreateRetVoid(); |
| 262 | |
| 263 | return InitFn; |
| 264 | } |
| 265 | |
| 266 | void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { |
| 267 | if (!Supported) |
| 268 | return; |
| 269 | |
| 270 | Builder.SetInsertPoint(InsertBefore->getIterator()); |
| 271 | Function *RDTSCPFn = getRDTSCP(); |
| 272 | Value *CurrentCycles = |
| 273 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
| 274 | Builder.CreateStore(Val: CurrentCycles, Ptr: CyclesInScopStartPtr, isVolatile: true); |
| 275 | } |
| 276 | |
| 277 | void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { |
| 278 | if (!Supported) |
| 279 | return; |
| 280 | |
| 281 | Builder.SetInsertPoint(InsertBefore->getIterator()); |
| 282 | Function *RDTSCPFn = getRDTSCP(); |
| 283 | Type *Int64Ty = Builder.getInt64Ty(); |
| 284 | LoadInst *CyclesStart = |
| 285 | Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopStartPtr, isVolatile: true); |
| 286 | Value *CurrentCycles = |
| 287 | Builder.CreateExtractValue(Agg: Builder.CreateCall(Callee: RDTSCPFn), Idxs: {0}); |
| 288 | Value *CyclesInScop = Builder.CreateSub(LHS: CurrentCycles, RHS: CyclesStart); |
| 289 | Value *CyclesInScops = Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInScopsPtr, isVolatile: true); |
| 290 | CyclesInScops = Builder.CreateAdd(LHS: CyclesInScops, RHS: CyclesInScop); |
| 291 | Builder.CreateStore(Val: CyclesInScops, Ptr: CyclesInScopsPtr, isVolatile: true); |
| 292 | |
| 293 | Value *CyclesInCurrentScop = |
| 294 | Builder.CreateLoad(Ty: Int64Ty, Ptr: CyclesInCurrentScopPtr, isVolatile: true); |
| 295 | CyclesInCurrentScop = Builder.CreateAdd(LHS: CyclesInCurrentScop, RHS: CyclesInScop); |
| 296 | Builder.CreateStore(Val: CyclesInCurrentScop, Ptr: CyclesInCurrentScopPtr, isVolatile: true); |
| 297 | |
| 298 | Value *TripCountForCurrentScop = |
| 299 | Builder.CreateLoad(Ty: Int64Ty, Ptr: TripCountForCurrentScopPtr, isVolatile: true); |
| 300 | TripCountForCurrentScop = |
| 301 | Builder.CreateAdd(LHS: TripCountForCurrentScop, RHS: Builder.getInt64(C: 1)); |
| 302 | Builder.CreateStore(Val: TripCountForCurrentScop, Ptr: TripCountForCurrentScopPtr, |
| 303 | isVolatile: true); |
| 304 | } |
| 305 | |