1 | //===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This is a command line utility to replay the execution of recorded OpenMP |
10 | // offload kernels. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "omptarget.h" |
15 | |
16 | #include "Shared/PluginAPI.h" |
17 | |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include "llvm/Support/JSON.h" |
20 | #include "llvm/Support/MemoryBuffer.h" |
21 | #include <cstdint> |
22 | #include <cstdlib> |
23 | |
24 | using namespace llvm; |
25 | |
26 | cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options" ); |
27 | |
28 | // InputFilename - The filename to read the json description of the kernel. |
29 | static cl::opt<std::string> InputFilename(cl::Positional, |
30 | cl::desc("<input kernel json file>" ), |
31 | cl::Required); |
32 | |
33 | static cl::opt<bool> VerifyOpt( |
34 | "verify" , |
35 | cl::desc( |
36 | "Verify device memory post execution against the original output." ), |
37 | cl::init(Val: false), cl::cat(ReplayOptions)); |
38 | |
39 | static cl::opt<bool> SaveOutputOpt( |
40 | "save-output" , |
41 | cl::desc("Save the device memory output of the replayed kernel execution." ), |
42 | cl::init(Val: false), cl::cat(ReplayOptions)); |
43 | |
44 | static cl::opt<unsigned> NumTeamsOpt("num-teams" , |
45 | cl::desc("Set the number of teams." ), |
46 | cl::init(Val: 0), cl::cat(ReplayOptions)); |
47 | |
48 | static cl::opt<unsigned> NumThreadsOpt("num-threads" , |
49 | cl::desc("Set the number of threads." ), |
50 | cl::init(Val: 0), cl::cat(ReplayOptions)); |
51 | |
52 | static cl::opt<int32_t> DeviceIdOpt("device-id" , cl::desc("Set the device id." ), |
53 | cl::init(Val: -1), cl::cat(ReplayOptions)); |
54 | |
55 | int main(int argc, char **argv) { |
56 | cl::HideUnrelatedOptions(Category&: ReplayOptions); |
57 | cl::ParseCommandLineOptions(argc, argv, Overview: "llvm-omp-kernel-replay\n" ); |
58 | |
59 | ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB = |
60 | MemoryBuffer::getFile(Filename: InputFilename, /*isText=*/IsText: true, |
61 | /*RequiresNullTerminator=*/true); |
62 | if (!KernelInfoMB) |
63 | report_fatal_error(reason: "Error reading the kernel info json file" ); |
64 | Expected<json::Value> JsonKernelInfo = |
65 | json::parse(JSON: KernelInfoMB.get()->getBuffer()); |
66 | if (auto Err = JsonKernelInfo.takeError()) |
67 | report_fatal_error(reason: "Cannot parse the kernel info json file" ); |
68 | |
69 | auto NumTeamsJson = |
70 | JsonKernelInfo->getAsObject()->getInteger(K: "NumTeamsClause" ); |
71 | unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value()); |
72 | auto NumThreadsJson = |
73 | JsonKernelInfo->getAsObject()->getInteger(K: "ThreadLimitClause" ); |
74 | unsigned NumThreads = |
75 | (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value()); |
76 | // TODO: Print a warning if number of teams/threads is explicitly set in the |
77 | // kernel info but overriden through command line options. |
78 | auto LoopTripCount = |
79 | JsonKernelInfo->getAsObject()->getInteger(K: "LoopTripCount" ); |
80 | auto KernelFunc = JsonKernelInfo->getAsObject()->getString(K: "Name" ); |
81 | |
82 | SmallVector<void *> TgtArgs; |
83 | SmallVector<ptrdiff_t> TgtArgOffsets; |
84 | auto NumArgs = JsonKernelInfo->getAsObject()->getInteger(K: "NumArgs" ); |
85 | auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray(K: "ArgPtrs" ); |
86 | for (auto It : *TgtArgsArray) |
87 | TgtArgs.push_back(Elt: reinterpret_cast<void *>(It.getAsInteger().value())); |
88 | auto *TgtArgOffsetsArray = |
89 | JsonKernelInfo->getAsObject()->getArray(K: "ArgOffsets" ); |
90 | for (auto It : *TgtArgOffsetsArray) |
91 | TgtArgOffsets.push_back(Elt: static_cast<ptrdiff_t>(It.getAsInteger().value())); |
92 | |
93 | void *BAllocStart = reinterpret_cast<void *>( |
94 | JsonKernelInfo->getAsObject()->getInteger(K: "BumpAllocVAStart" ).value()); |
95 | |
96 | __tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0}; |
97 | std::string KernelEntryName = KernelFunc.value().str(); |
98 | KernelEntry.name = const_cast<char *>(KernelEntryName.c_str()); |
99 | // Anything non-zero works to uniquely identify the kernel. |
100 | KernelEntry.addr = (void *)0x1; |
101 | |
102 | ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB = |
103 | MemoryBuffer::getFile(Filename: KernelEntryName + ".image" , /*isText=*/IsText: false, |
104 | /*RequiresNullTerminator=*/false); |
105 | if (!ImageMB) |
106 | report_fatal_error(reason: "Error reading the kernel image." ); |
107 | |
108 | __tgt_device_image DeviceImage; |
109 | DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart()); |
110 | DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd()); |
111 | DeviceImage.EntriesBegin = &KernelEntry; |
112 | DeviceImage.EntriesEnd = &KernelEntry + 1; |
113 | |
114 | __tgt_bin_desc Desc; |
115 | Desc.NumDeviceImages = 1; |
116 | Desc.HostEntriesBegin = &KernelEntry; |
117 | Desc.HostEntriesEnd = &KernelEntry + 1; |
118 | Desc.DeviceImages = &DeviceImage; |
119 | |
120 | auto DeviceMemorySizeJson = |
121 | JsonKernelInfo->getAsObject()->getInteger(K: "DeviceMemorySize" ); |
122 | // Set device memory size to the ceiling of GB granularity. |
123 | uint64_t DeviceMemorySize = std::ceil(x: DeviceMemorySizeJson.value()); |
124 | |
125 | auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger(K: "DeviceId" ); |
126 | // TODO: Print warning if the user overrides the device id in the json file. |
127 | int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value()); |
128 | |
129 | // TODO: do we need requires? |
130 | //__tgt_register_requires(/*Flags=*/1); |
131 | |
132 | __tgt_register_lib(&Desc); |
133 | |
134 | uint64_t ReqPtrArgOffset = 0; |
135 | int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart, |
136 | false, VerifyOpt, ReqPtrArgOffset); |
137 | |
138 | if (Rc != OMP_TGT_SUCCESS) { |
139 | report_fatal_error(reason: "Cannot activate record replay\n" ); |
140 | } |
141 | |
142 | ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB = |
143 | MemoryBuffer::getFile(Filename: KernelEntryName + ".memory" , /*isText=*/IsText: false, |
144 | /*RequiresNullTerminator=*/false); |
145 | |
146 | if (!DeviceMemoryMB) |
147 | report_fatal_error(reason: "Error reading the kernel input device memory." ); |
148 | |
149 | // On AMD for currently unknown reasons we cannot copy memory mapped data to |
150 | // device. This is a work-around. |
151 | uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()]; |
152 | std::memcpy(dest: recored_data, |
153 | src: const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()), |
154 | n: DeviceMemoryMB.get()->getBufferSize()); |
155 | |
156 | // If necessary, adjust pointer arguments. |
157 | if (ReqPtrArgOffset) { |
158 | for (auto *&Arg : TgtArgs) { |
159 | auto ArgInt = uintptr_t(Arg); |
160 | // Try to find pointer arguments. |
161 | if (ArgInt < uintptr_t(BAllocStart) || |
162 | ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize) |
163 | continue; |
164 | Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset); |
165 | } |
166 | } |
167 | |
168 | __tgt_target_kernel_replay( |
169 | /*Loc=*/nullptr, DeviceId, KernelEntry.addr, (char *)recored_data, |
170 | DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(), |
171 | TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads, |
172 | LoopTripCount.value()); |
173 | |
174 | if (VerifyOpt) { |
175 | ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB = |
176 | MemoryBuffer::getFile(Filename: KernelEntryName + ".original.output" , |
177 | /*isText=*/IsText: false, |
178 | /*RequiresNullTerminator=*/false); |
179 | if (!OriginalOutputMB) |
180 | report_fatal_error(reason: "Error reading the kernel original output file, make " |
181 | "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording" ); |
182 | ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB = |
183 | MemoryBuffer::getFile(Filename: KernelEntryName + ".replay.output" , |
184 | /*isText=*/IsText: false, |
185 | /*RequiresNullTerminator=*/false); |
186 | if (!ReplayOutputMB) |
187 | report_fatal_error(reason: "Error reading the kernel replay output file" ); |
188 | |
189 | StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer(); |
190 | StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer(); |
191 | if (OriginalOutput == ReplayOutput) |
192 | outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n" ; |
193 | else |
194 | outs() << "[llvm-omp-kernel-replay] Replay device memory failed to " |
195 | "verify!\n" ; |
196 | } |
197 | |
198 | delete[] recored_data; |
199 | |
200 | return 0; |
201 | } |
202 | |