| 1 | //===- GlobalHandler.cpp - Target independent global & env. var handling --===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Target independent global handler and environment manager. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "GlobalHandler.h" |
| 14 | #include "PluginInterface.h" |
| 15 | #include "Utils/ELF.h" |
| 16 | |
| 17 | #include "Shared/Utils.h" |
| 18 | |
| 19 | #include "llvm/ProfileData/InstrProfData.inc" |
| 20 | #include "llvm/Support/Error.h" |
| 21 | |
| 22 | #include <cstring> |
| 23 | #include <string> |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace omp; |
| 27 | using namespace target; |
| 28 | using namespace plugin; |
| 29 | using namespace error; |
| 30 | |
| 31 | Expected<std::unique_ptr<ObjectFile>> |
| 32 | GenericGlobalHandlerTy::getELFObjectFile(DeviceImageTy &Image) { |
| 33 | assert(utils::elf::isELF(Image.getMemoryBuffer().getBuffer()) && |
| 34 | "Input is not an ELF file" ); |
| 35 | |
| 36 | auto Expected = |
| 37 | ELFObjectFileBase::createELFObjectFile(Image.getMemoryBuffer()); |
| 38 | if (!Expected) { |
| 39 | return Plugin::error(ErrorCode::INVALID_BINARY, Expected.takeError(), |
| 40 | "error parsing binary" ); |
| 41 | } |
| 42 | return Expected; |
| 43 | } |
| 44 | |
| 45 | Error GenericGlobalHandlerTy::moveGlobalBetweenDeviceAndHost( |
| 46 | GenericDeviceTy &Device, DeviceImageTy &Image, const GlobalTy &HostGlobal, |
| 47 | bool Device2Host) { |
| 48 | |
| 49 | GlobalTy DeviceGlobal(HostGlobal.getName(), HostGlobal.getSize()); |
| 50 | |
| 51 | // Get the metadata from the global on the device. |
| 52 | if (auto Err = getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) |
| 53 | return Err; |
| 54 | |
| 55 | // Perform the actual transfer. |
| 56 | return moveGlobalBetweenDeviceAndHost(Device, HostGlobal, DeviceGlobal, |
| 57 | Device2Host); |
| 58 | } |
| 59 | |
| 60 | /// Actually move memory between host and device. See readGlobalFromDevice and |
| 61 | /// writeGlobalToDevice for the interface description. |
| 62 | Error GenericGlobalHandlerTy::moveGlobalBetweenDeviceAndHost( |
| 63 | GenericDeviceTy &Device, const GlobalTy &HostGlobal, |
| 64 | const GlobalTy &DeviceGlobal, bool Device2Host) { |
| 65 | |
| 66 | // Transfer the data from the source to the destination. |
| 67 | if (Device2Host) { |
| 68 | if (auto Err = |
| 69 | Device.dataRetrieve(HostGlobal.getPtr(), DeviceGlobal.getPtr(), |
| 70 | HostGlobal.getSize(), nullptr)) |
| 71 | return Err; |
| 72 | } else { |
| 73 | if (auto Err = Device.dataSubmit(DeviceGlobal.getPtr(), HostGlobal.getPtr(), |
| 74 | HostGlobal.getSize(), nullptr)) |
| 75 | return Err; |
| 76 | } |
| 77 | |
| 78 | DP("Successfully %s %u bytes associated with global symbol '%s' %s the " |
| 79 | "device " |
| 80 | "(%p -> %p).\n" , |
| 81 | Device2Host ? "read" : "write" , HostGlobal.getSize(), |
| 82 | HostGlobal.getName().data(), Device2Host ? "from" : "to" , |
| 83 | DeviceGlobal.getPtr(), HostGlobal.getPtr()); |
| 84 | |
| 85 | return Plugin::success(); |
| 86 | } |
| 87 | |
| 88 | bool GenericGlobalHandlerTy::isSymbolInImage(GenericDeviceTy &Device, |
| 89 | DeviceImageTy &Image, |
| 90 | StringRef SymName) { |
| 91 | // Get the ELF object file for the image. Notice the ELF object may already |
| 92 | // be created in previous calls, so we can reuse it. If this is unsuccessful |
| 93 | // just return false as we couldn't find it. |
| 94 | auto ELFObjOrErr = getELFObjectFile(Image); |
| 95 | if (!ELFObjOrErr) { |
| 96 | consumeError(ELFObjOrErr.takeError()); |
| 97 | return false; |
| 98 | } |
| 99 | |
| 100 | // Search the ELF symbol using the symbol name. |
| 101 | auto SymOrErr = utils::elf::getSymbol(**ELFObjOrErr, SymName); |
| 102 | if (!SymOrErr) { |
| 103 | consumeError(SymOrErr.takeError()); |
| 104 | return false; |
| 105 | } |
| 106 | |
| 107 | return SymOrErr->has_value(); |
| 108 | } |
| 109 | |
| 110 | Error GenericGlobalHandlerTy::getGlobalMetadataFromImage( |
| 111 | GenericDeviceTy &Device, DeviceImageTy &Image, GlobalTy &ImageGlobal) { |
| 112 | |
| 113 | // Get the ELF object file for the image. Notice the ELF object may already |
| 114 | // be created in previous calls, so we can reuse it. |
| 115 | auto ELFObj = getELFObjectFile(Image); |
| 116 | if (!ELFObj) |
| 117 | return ELFObj.takeError(); |
| 118 | |
| 119 | // Search the ELF symbol using the symbol name. |
| 120 | auto SymOrErr = utils::elf::getSymbol(**ELFObj, ImageGlobal.getName()); |
| 121 | if (!SymOrErr) |
| 122 | return Plugin::error( |
| 123 | ErrorCode::NOT_FOUND, "failed ELF lookup of global '%s': %s" , |
| 124 | ImageGlobal.getName().data(), toString(SymOrErr.takeError()).data()); |
| 125 | |
| 126 | if (!SymOrErr->has_value()) |
| 127 | return Plugin::error(ErrorCode::NOT_FOUND, |
| 128 | "failed to find global symbol '%s' in the ELF image" , |
| 129 | ImageGlobal.getName().data()); |
| 130 | |
| 131 | auto AddrOrErr = utils::elf::getSymbolAddress(**SymOrErr); |
| 132 | // Get the section to which the symbol belongs. |
| 133 | if (!AddrOrErr) |
| 134 | return Plugin::error( |
| 135 | ErrorCode::NOT_FOUND, "failed to get ELF symbol from global '%s': %s" , |
| 136 | ImageGlobal.getName().data(), toString(AddrOrErr.takeError()).data()); |
| 137 | |
| 138 | // Setup the global symbol's address and size. |
| 139 | ImageGlobal.setPtr(const_cast<void *>(*AddrOrErr)); |
| 140 | ImageGlobal.setSize((*SymOrErr)->getSize()); |
| 141 | |
| 142 | return Plugin::success(); |
| 143 | } |
| 144 | |
| 145 | Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, |
| 146 | DeviceImageTy &Image, |
| 147 | const GlobalTy &HostGlobal) { |
| 148 | |
| 149 | GlobalTy ImageGlobal(HostGlobal.getName(), -1); |
| 150 | if (auto Err = getGlobalMetadataFromImage(Device, Image, ImageGlobal)) |
| 151 | return Err; |
| 152 | |
| 153 | if (ImageGlobal.getSize() != HostGlobal.getSize()) |
| 154 | return Plugin::error(ErrorCode::INVALID_BINARY, |
| 155 | "transfer failed because global symbol '%s' has " |
| 156 | "%u bytes in the ELF image but %u bytes on the host" , |
| 157 | HostGlobal.getName().data(), ImageGlobal.getSize(), |
| 158 | HostGlobal.getSize()); |
| 159 | |
| 160 | DP("Global symbol '%s' was found in the ELF image and %u bytes will copied " |
| 161 | "from %p to %p.\n" , |
| 162 | HostGlobal.getName().data(), HostGlobal.getSize(), ImageGlobal.getPtr(), |
| 163 | HostGlobal.getPtr()); |
| 164 | |
| 165 | assert(Image.getStart() <= ImageGlobal.getPtr() && |
| 166 | utils::advancePtr(ImageGlobal.getPtr(), ImageGlobal.getSize()) < |
| 167 | utils::advancePtr(Image.getStart(), Image.getSize()) && |
| 168 | "Attempting to read outside the image!" ); |
| 169 | |
| 170 | // Perform the copy from the image to the host memory. |
| 171 | std::memcpy(HostGlobal.getPtr(), ImageGlobal.getPtr(), HostGlobal.getSize()); |
| 172 | |
| 173 | return Plugin::success(); |
| 174 | } |
| 175 | |
| 176 | Expected<GPUProfGlobals> |
| 177 | GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, |
| 178 | DeviceImageTy &Image) { |
| 179 | GPUProfGlobals DeviceProfileData; |
| 180 | auto ObjFile = getELFObjectFile(Image); |
| 181 | if (!ObjFile) |
| 182 | return ObjFile.takeError(); |
| 183 | |
| 184 | std::unique_ptr<ELFObjectFileBase> ELFObj( |
| 185 | static_cast<ELFObjectFileBase *>(ObjFile->release())); |
| 186 | DeviceProfileData.TargetTriple = ELFObj->makeTriple(); |
| 187 | |
| 188 | // Iterate through elf symbols |
| 189 | for (auto &Sym : ELFObj->symbols()) { |
| 190 | auto NameOrErr = Sym.getName(); |
| 191 | if (!NameOrErr) |
| 192 | return NameOrErr.takeError(); |
| 193 | |
| 194 | // Check if given current global is a profiling global based |
| 195 | // on name |
| 196 | if (*NameOrErr == getInstrProfNamesVarName()) { |
| 197 | // Read in profiled function names from ELF |
| 198 | auto SectionOrErr = Sym.getSection(); |
| 199 | if (!SectionOrErr) |
| 200 | return SectionOrErr.takeError(); |
| 201 | |
| 202 | auto ContentsOrErr = (*SectionOrErr)->getContents(); |
| 203 | if (!ContentsOrErr) |
| 204 | return ContentsOrErr.takeError(); |
| 205 | |
| 206 | SmallVector<uint8_t> NameBytes(ContentsOrErr->bytes()); |
| 207 | DeviceProfileData.NamesData = NameBytes; |
| 208 | } else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) { |
| 209 | // Read global variable profiling counts |
| 210 | SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0); |
| 211 | GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data()); |
| 212 | if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal)) |
| 213 | return Err; |
| 214 | DeviceProfileData.Counts.append(std::move(Counts)); |
| 215 | } else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) { |
| 216 | // Read profiling data for this global variable |
| 217 | __llvm_profile_data Data{}; |
| 218 | GlobalTy DataGlobal(NameOrErr->str(), Sym.getSize(), &Data); |
| 219 | if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal)) |
| 220 | return Err; |
| 221 | DeviceProfileData.Data.push_back(std::move(Data)); |
| 222 | } else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) { |
| 223 | uint64_t RawVersionData; |
| 224 | GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(), |
| 225 | &RawVersionData); |
| 226 | if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal)) |
| 227 | return Err; |
| 228 | DeviceProfileData.Version = RawVersionData; |
| 229 | } |
| 230 | } |
| 231 | return DeviceProfileData; |
| 232 | } |
| 233 | |
| 234 | void GPUProfGlobals::dump() const { |
| 235 | outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str() |
| 236 | << "\n" ; |
| 237 | |
| 238 | outs() << "======== Counters =========\n" ; |
| 239 | for (size_t i = 0; i < Counts.size(); i++) { |
| 240 | if (i > 0 && i % 10 == 0) |
| 241 | outs() << "\n" ; |
| 242 | else if (i != 0) |
| 243 | outs() << " " ; |
| 244 | outs() << Counts[i]; |
| 245 | } |
| 246 | outs() << "\n" ; |
| 247 | |
| 248 | outs() << "========== Data ===========\n" ; |
| 249 | for (const auto &ProfData : Data) { |
| 250 | outs() << "{ " ; |
| 251 | // The ProfData.Name maybe array, eg: NumValueSites[IPVK_Last+1] . |
| 252 | // If we print out it directly, we are accessing out of bound data. |
| 253 | // Skip dumping the array for now. |
| 254 | #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ |
| 255 | if (sizeof(#Name) > 2 && #Name[sizeof(#Name) - 2] == ']') { \ |
| 256 | outs() << "[...] "; \ |
| 257 | } else { \ |
| 258 | outs() << ProfData.Name << " "; \ |
| 259 | } |
| 260 | #include "llvm/ProfileData/InstrProfData.inc" |
| 261 | outs() << "}\n" ; |
| 262 | } |
| 263 | |
| 264 | outs() << "======== Functions ========\n" ; |
| 265 | std::string s; |
| 266 | s.reserve(NamesData.size()); |
| 267 | for (uint8_t Name : NamesData) { |
| 268 | s.push_back((char)Name); |
| 269 | } |
| 270 | |
| 271 | InstrProfSymtab Symtab; |
| 272 | if (Error Err = Symtab.create(StringRef(s))) { |
| 273 | consumeError(std::move(Err)); |
| 274 | } |
| 275 | Symtab.dumpNames(outs()); |
| 276 | outs() << "===========================\n" ; |
| 277 | } |
| 278 | |
| 279 | Error GPUProfGlobals::write() const { |
| 280 | if (!__llvm_write_custom_profile) |
| 281 | return Plugin::error(ErrorCode::INVALID_BINARY, |
| 282 | "could not find symbol __llvm_write_custom_profile. " |
| 283 | "The compiler-rt profiling library must be linked for " |
| 284 | "GPU PGO to work." ); |
| 285 | |
| 286 | size_t DataSize = Data.size() * sizeof(__llvm_profile_data), |
| 287 | CountsSize = Counts.size() * sizeof(int64_t); |
| 288 | __llvm_profile_data *DataBegin, *DataEnd; |
| 289 | char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd; |
| 290 | |
| 291 | // Initialize array of contiguous data. We need to make sure each section is |
| 292 | // contiguous so that the PGO library can compute deltas properly |
| 293 | SmallVector<uint8_t> ContiguousData(NamesData.size() + DataSize + CountsSize); |
| 294 | |
| 295 | // Compute region pointers |
| 296 | DataBegin = (__llvm_profile_data *)(ContiguousData.data() + CountsSize); |
| 297 | DataEnd = |
| 298 | (__llvm_profile_data *)(ContiguousData.data() + CountsSize + DataSize); |
| 299 | CountersBegin = (char *)ContiguousData.data(); |
| 300 | CountersEnd = (char *)(ContiguousData.data() + CountsSize); |
| 301 | NamesBegin = (char *)(ContiguousData.data() + CountsSize + DataSize); |
| 302 | NamesEnd = (char *)(ContiguousData.data() + CountsSize + DataSize + |
| 303 | NamesData.size()); |
| 304 | |
| 305 | // Copy data to contiguous buffer |
| 306 | memcpy(DataBegin, Data.data(), DataSize); |
| 307 | memcpy(CountersBegin, Counts.data(), CountsSize); |
| 308 | memcpy(NamesBegin, NamesData.data(), NamesData.size()); |
| 309 | |
| 310 | // Invoke compiler-rt entrypoint |
| 311 | int result = __llvm_write_custom_profile( |
| 312 | TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin, |
| 313 | CountersEnd, NamesBegin, NamesEnd, &Version); |
| 314 | if (result != 0) |
| 315 | return Plugin::error(ErrorCode::HOST_IO, |
| 316 | "error writing GPU PGO data to file" ); |
| 317 | |
| 318 | return Plugin::success(); |
| 319 | } |
| 320 | |
| 321 | bool GPUProfGlobals::empty() const { |
| 322 | return Counts.empty() && Data.empty() && NamesData.empty(); |
| 323 | } |
| 324 | |