| 1 | //===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This is a command line utility to replay the execution of recorded OpenMP |
| 10 | // offload kernels. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "omptarget.h" |
| 15 | |
| 16 | #include "llvm/Frontend/Offloading/Utility.h" |
| 17 | #include "llvm/Support/CommandLine.h" |
| 18 | #include "llvm/Support/JSON.h" |
| 19 | #include "llvm/Support/MemoryBuffer.h" |
| 20 | |
| 21 | #include <cstdint> |
| 22 | #include <cstdlib> |
| 23 | |
| 24 | using namespace llvm; |
| 25 | |
| 26 | cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options" ); |
| 27 | |
| 28 | // InputFilename - The filename to read the json description of the kernel. |
| 29 | static cl::opt<std::string> InputFilename(cl::Positional, |
| 30 | cl::desc("<input kernel json file>" ), |
| 31 | cl::Required); |
| 32 | |
| 33 | static cl::opt<bool> VerifyOpt( |
| 34 | "verify" , |
| 35 | cl::desc( |
| 36 | "Verify device memory post execution against the original output." ), |
| 37 | cl::init(Val: false), cl::cat(ReplayOptions)); |
| 38 | |
| 39 | static cl::opt<bool> SaveOutputOpt( |
| 40 | "save-output" , |
| 41 | cl::desc("Save the device memory output of the replayed kernel execution." ), |
| 42 | cl::init(Val: false), cl::cat(ReplayOptions)); |
| 43 | |
| 44 | static cl::opt<unsigned> NumTeamsOpt("num-teams" , |
| 45 | cl::desc("Set the number of teams." ), |
| 46 | cl::init(Val: 0), cl::cat(ReplayOptions)); |
| 47 | |
| 48 | static cl::opt<unsigned> NumThreadsOpt("num-threads" , |
| 49 | cl::desc("Set the number of threads." ), |
| 50 | cl::init(Val: 0), cl::cat(ReplayOptions)); |
| 51 | |
| 52 | static cl::opt<int32_t> DeviceIdOpt("device-id" , cl::desc("Set the device id." ), |
| 53 | cl::init(Val: -1), cl::cat(ReplayOptions)); |
| 54 | |
| 55 | int main(int argc, char **argv) { |
| 56 | cl::HideUnrelatedOptions(Category&: ReplayOptions); |
| 57 | cl::ParseCommandLineOptions(argc, argv, Overview: "llvm-omp-kernel-replay\n" ); |
| 58 | |
| 59 | ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB = |
| 60 | MemoryBuffer::getFile(Filename: InputFilename, /*isText=*/IsText: true, |
| 61 | /*RequiresNullTerminator=*/true); |
| 62 | if (!KernelInfoMB) |
| 63 | report_fatal_error(reason: "Error reading the kernel info json file" ); |
| 64 | Expected<json::Value> JsonKernelInfo = |
| 65 | json::parse(JSON: KernelInfoMB.get()->getBuffer()); |
| 66 | if (auto Err = JsonKernelInfo.takeError()) |
| 67 | report_fatal_error(reason: "Cannot parse the kernel info json file" ); |
| 68 | |
| 69 | auto NumTeamsJson = |
| 70 | JsonKernelInfo->getAsObject()->getInteger(K: "NumTeamsClause" ); |
| 71 | unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value()); |
| 72 | auto NumThreadsJson = |
| 73 | JsonKernelInfo->getAsObject()->getInteger(K: "ThreadLimitClause" ); |
| 74 | unsigned NumThreads = |
| 75 | (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value()); |
| 76 | // TODO: Print a warning if number of teams/threads is explicitly set in the |
| 77 | // kernel info but overridden through command line options. |
| 78 | auto LoopTripCount = |
| 79 | JsonKernelInfo->getAsObject()->getInteger(K: "LoopTripCount" ); |
| 80 | auto KernelFunc = JsonKernelInfo->getAsObject()->getString(K: "Name" ); |
| 81 | |
| 82 | SmallVector<void *> TgtArgs; |
| 83 | SmallVector<ptrdiff_t> TgtArgOffsets; |
| 84 | auto NumArgs = JsonKernelInfo->getAsObject()->getInteger(K: "NumArgs" ); |
| 85 | auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray(K: "ArgPtrs" ); |
| 86 | for (auto It : *TgtArgsArray) |
| 87 | TgtArgs.push_back(Elt: reinterpret_cast<void *>(It.getAsInteger().value())); |
| 88 | auto *TgtArgOffsetsArray = |
| 89 | JsonKernelInfo->getAsObject()->getArray(K: "ArgOffsets" ); |
| 90 | for (auto It : *TgtArgOffsetsArray) |
| 91 | TgtArgOffsets.push_back(Elt: static_cast<ptrdiff_t>(It.getAsInteger().value())); |
| 92 | |
| 93 | void *BAllocStart = reinterpret_cast<void *>( |
| 94 | JsonKernelInfo->getAsObject()->getInteger(K: "BumpAllocVAStart" ).value()); |
| 95 | |
| 96 | llvm::offloading::EntryTy KernelEntry = {.Reserved: ~0U, .Version: 0, .Kind: 0, .Flags: 0, .Address: nullptr, |
| 97 | .SymbolName: nullptr, .Size: 0, .Data: 0, .AuxAddr: nullptr}; |
| 98 | std::string KernelEntryName = KernelFunc.value().str(); |
| 99 | KernelEntry.SymbolName = const_cast<char *>(KernelEntryName.c_str()); |
| 100 | // Anything non-zero works to uniquely identify the kernel. |
| 101 | KernelEntry.Address = (void *)0x1; |
| 102 | |
| 103 | ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB = |
| 104 | MemoryBuffer::getFile(Filename: KernelEntryName + ".image" , /*isText=*/IsText: false, |
| 105 | /*RequiresNullTerminator=*/false); |
| 106 | if (!ImageMB) |
| 107 | report_fatal_error(reason: "Error reading the kernel image." ); |
| 108 | |
| 109 | __tgt_device_image DeviceImage; |
| 110 | DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart()); |
| 111 | DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd()); |
| 112 | DeviceImage.EntriesBegin = &KernelEntry; |
| 113 | DeviceImage.EntriesEnd = &KernelEntry + 1; |
| 114 | |
| 115 | __tgt_bin_desc Desc; |
| 116 | Desc.NumDeviceImages = 1; |
| 117 | Desc.HostEntriesBegin = &KernelEntry; |
| 118 | Desc.HostEntriesEnd = &KernelEntry + 1; |
| 119 | Desc.DeviceImages = &DeviceImage; |
| 120 | |
| 121 | auto DeviceMemorySizeJson = |
| 122 | JsonKernelInfo->getAsObject()->getInteger(K: "DeviceMemorySize" ); |
| 123 | // Set device memory size to the ceiling of GB granularity. |
| 124 | uint64_t DeviceMemorySize = std::ceil(x: DeviceMemorySizeJson.value()); |
| 125 | |
| 126 | auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger(K: "DeviceId" ); |
| 127 | // TODO: Print warning if the user overrides the device id in the json file. |
| 128 | int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value()); |
| 129 | |
| 130 | // TODO: do we need requires? |
| 131 | //__tgt_register_requires(/*Flags=*/1); |
| 132 | |
| 133 | __tgt_register_lib(&Desc); |
| 134 | |
| 135 | uint64_t ReqPtrArgOffset = 0; |
| 136 | int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart, |
| 137 | false, VerifyOpt, ReqPtrArgOffset); |
| 138 | |
| 139 | if (Rc != OMP_TGT_SUCCESS) { |
| 140 | report_fatal_error(reason: "Cannot activate record replay\n" ); |
| 141 | } |
| 142 | |
| 143 | ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB = |
| 144 | MemoryBuffer::getFile(Filename: KernelEntryName + ".memory" , /*isText=*/IsText: false, |
| 145 | /*RequiresNullTerminator=*/false); |
| 146 | |
| 147 | if (!DeviceMemoryMB) |
| 148 | report_fatal_error(reason: "Error reading the kernel input device memory." ); |
| 149 | |
| 150 | // On AMD for currently unknown reasons we cannot copy memory mapped data to |
| 151 | // device. This is a work-around. |
| 152 | uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()]; |
| 153 | std::memcpy(dest: recored_data, |
| 154 | src: const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()), |
| 155 | n: DeviceMemoryMB.get()->getBufferSize()); |
| 156 | |
| 157 | // If necessary, adjust pointer arguments. |
| 158 | if (ReqPtrArgOffset) { |
| 159 | for (auto *&Arg : TgtArgs) { |
| 160 | auto ArgInt = uintptr_t(Arg); |
| 161 | // Try to find pointer arguments. |
| 162 | if (ArgInt < uintptr_t(BAllocStart) || |
| 163 | ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize) |
| 164 | continue; |
| 165 | Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | __tgt_target_kernel_replay( |
| 170 | /*Loc=*/nullptr, DeviceId, KernelEntry.Address, (char *)recored_data, |
| 171 | DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(), |
| 172 | TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads, |
| 173 | LoopTripCount.value()); |
| 174 | |
| 175 | if (VerifyOpt) { |
| 176 | ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB = |
| 177 | MemoryBuffer::getFile(Filename: KernelEntryName + ".original.output" , |
| 178 | /*isText=*/IsText: false, |
| 179 | /*RequiresNullTerminator=*/false); |
| 180 | if (!OriginalOutputMB) |
| 181 | report_fatal_error(reason: "Error reading the kernel original output file, make " |
| 182 | "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording" ); |
| 183 | ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB = |
| 184 | MemoryBuffer::getFile(Filename: KernelEntryName + ".replay.output" , |
| 185 | /*isText=*/IsText: false, |
| 186 | /*RequiresNullTerminator=*/false); |
| 187 | if (!ReplayOutputMB) |
| 188 | report_fatal_error(reason: "Error reading the kernel replay output file" ); |
| 189 | |
| 190 | StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer(); |
| 191 | StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer(); |
| 192 | if (OriginalOutput == ReplayOutput) |
| 193 | outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n" ; |
| 194 | else |
| 195 | outs() << "[llvm-omp-kernel-replay] Replay device memory failed to " |
| 196 | "verify!\n" ; |
| 197 | } |
| 198 | |
| 199 | delete[] recored_data; |
| 200 | |
| 201 | return 0; |
| 202 | } |
| 203 | |