1//===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a command line utility to replay the execution of recorded OpenMP
10// offload kernels.
11//
12//===----------------------------------------------------------------------===//
13
14#include "omptarget.h"
15
16#include "llvm/Frontend/Offloading/Utility.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/JSON.h"
19#include "llvm/Support/MemoryBuffer.h"
20
21#include <cstdint>
22#include <cstdlib>
23
24using namespace llvm;
25
26cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options");
27
28// InputFilename - The filename to read the json description of the kernel.
29static cl::opt<std::string> InputFilename(cl::Positional,
30 cl::desc("<input kernel json file>"),
31 cl::Required);
32
33static cl::opt<bool> VerifyOpt(
34 "verify",
35 cl::desc(
36 "Verify device memory post execution against the original output."),
37 cl::init(Val: false), cl::cat(ReplayOptions));
38
39static cl::opt<bool> SaveOutputOpt(
40 "save-output",
41 cl::desc("Save the device memory output of the replayed kernel execution."),
42 cl::init(Val: false), cl::cat(ReplayOptions));
43
44static cl::opt<unsigned> NumTeamsOpt("num-teams",
45 cl::desc("Set the number of teams."),
46 cl::init(Val: 0), cl::cat(ReplayOptions));
47
48static cl::opt<unsigned> NumThreadsOpt("num-threads",
49 cl::desc("Set the number of threads."),
50 cl::init(Val: 0), cl::cat(ReplayOptions));
51
52static cl::opt<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."),
53 cl::init(Val: -1), cl::cat(ReplayOptions));
54
55int main(int argc, char **argv) {
56 cl::HideUnrelatedOptions(Category&: ReplayOptions);
57 cl::ParseCommandLineOptions(argc, argv, Overview: "llvm-omp-kernel-replay\n");
58
59 ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB =
60 MemoryBuffer::getFile(Filename: InputFilename, /*isText=*/IsText: true,
61 /*RequiresNullTerminator=*/true);
62 if (!KernelInfoMB)
63 report_fatal_error(reason: "Error reading the kernel info json file");
64 Expected<json::Value> JsonKernelInfo =
65 json::parse(JSON: KernelInfoMB.get()->getBuffer());
66 if (auto Err = JsonKernelInfo.takeError())
67 report_fatal_error(reason: "Cannot parse the kernel info json file");
68
69 auto NumTeamsJson =
70 JsonKernelInfo->getAsObject()->getInteger(K: "NumTeamsClause");
71 unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value());
72 auto NumThreadsJson =
73 JsonKernelInfo->getAsObject()->getInteger(K: "ThreadLimitClause");
74 unsigned NumThreads =
75 (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value());
76 // TODO: Print a warning if number of teams/threads is explicitly set in the
77 // kernel info but overridden through command line options.
78 auto LoopTripCount =
79 JsonKernelInfo->getAsObject()->getInteger(K: "LoopTripCount");
80 auto KernelFunc = JsonKernelInfo->getAsObject()->getString(K: "Name");
81
82 SmallVector<void *> TgtArgs;
83 SmallVector<ptrdiff_t> TgtArgOffsets;
84 auto NumArgs = JsonKernelInfo->getAsObject()->getInteger(K: "NumArgs");
85 auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray(K: "ArgPtrs");
86 for (auto It : *TgtArgsArray)
87 TgtArgs.push_back(Elt: reinterpret_cast<void *>(It.getAsInteger().value()));
88 auto *TgtArgOffsetsArray =
89 JsonKernelInfo->getAsObject()->getArray(K: "ArgOffsets");
90 for (auto It : *TgtArgOffsetsArray)
91 TgtArgOffsets.push_back(Elt: static_cast<ptrdiff_t>(It.getAsInteger().value()));
92
93 void *BAllocStart = reinterpret_cast<void *>(
94 JsonKernelInfo->getAsObject()->getInteger(K: "BumpAllocVAStart").value());
95
96 llvm::offloading::EntryTy KernelEntry = {.Reserved: ~0U, .Version: 0, .Kind: 0, .Flags: 0, .Address: nullptr,
97 .SymbolName: nullptr, .Size: 0, .Data: 0, .AuxAddr: nullptr};
98 std::string KernelEntryName = KernelFunc.value().str();
99 KernelEntry.SymbolName = const_cast<char *>(KernelEntryName.c_str());
100 // Anything non-zero works to uniquely identify the kernel.
101 KernelEntry.Address = (void *)0x1;
102
103 ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB =
104 MemoryBuffer::getFile(Filename: KernelEntryName + ".image", /*isText=*/IsText: false,
105 /*RequiresNullTerminator=*/false);
106 if (!ImageMB)
107 report_fatal_error(reason: "Error reading the kernel image.");
108
109 __tgt_device_image DeviceImage;
110 DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart());
111 DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd());
112 DeviceImage.EntriesBegin = &KernelEntry;
113 DeviceImage.EntriesEnd = &KernelEntry + 1;
114
115 __tgt_bin_desc Desc;
116 Desc.NumDeviceImages = 1;
117 Desc.HostEntriesBegin = &KernelEntry;
118 Desc.HostEntriesEnd = &KernelEntry + 1;
119 Desc.DeviceImages = &DeviceImage;
120
121 auto DeviceMemorySizeJson =
122 JsonKernelInfo->getAsObject()->getInteger(K: "DeviceMemorySize");
123 // Set device memory size to the ceiling of GB granularity.
124 uint64_t DeviceMemorySize = std::ceil(x: DeviceMemorySizeJson.value());
125
126 auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger(K: "DeviceId");
127 // TODO: Print warning if the user overrides the device id in the json file.
128 int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value());
129
130 // TODO: do we need requires?
131 //__tgt_register_requires(/*Flags=*/1);
132
133 __tgt_register_lib(&Desc);
134
135 uint64_t ReqPtrArgOffset = 0;
136 int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart,
137 false, VerifyOpt, ReqPtrArgOffset);
138
139 if (Rc != OMP_TGT_SUCCESS) {
140 report_fatal_error(reason: "Cannot activate record replay\n");
141 }
142
143 ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB =
144 MemoryBuffer::getFile(Filename: KernelEntryName + ".memory", /*isText=*/IsText: false,
145 /*RequiresNullTerminator=*/false);
146
147 if (!DeviceMemoryMB)
148 report_fatal_error(reason: "Error reading the kernel input device memory.");
149
150 // On AMD for currently unknown reasons we cannot copy memory mapped data to
151 // device. This is a work-around.
152 uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
153 std::memcpy(dest: recored_data,
154 src: const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
155 n: DeviceMemoryMB.get()->getBufferSize());
156
157 // If necessary, adjust pointer arguments.
158 if (ReqPtrArgOffset) {
159 for (auto *&Arg : TgtArgs) {
160 auto ArgInt = uintptr_t(Arg);
161 // Try to find pointer arguments.
162 if (ArgInt < uintptr_t(BAllocStart) ||
163 ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize)
164 continue;
165 Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset);
166 }
167 }
168
169 __tgt_target_kernel_replay(
170 /*Loc=*/nullptr, DeviceId, KernelEntry.Address, (char *)recored_data,
171 DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(),
172 TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads,
173 LoopTripCount.value());
174
175 if (VerifyOpt) {
176 ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB =
177 MemoryBuffer::getFile(Filename: KernelEntryName + ".original.output",
178 /*isText=*/IsText: false,
179 /*RequiresNullTerminator=*/false);
180 if (!OriginalOutputMB)
181 report_fatal_error(reason: "Error reading the kernel original output file, make "
182 "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording");
183 ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB =
184 MemoryBuffer::getFile(Filename: KernelEntryName + ".replay.output",
185 /*isText=*/IsText: false,
186 /*RequiresNullTerminator=*/false);
187 if (!ReplayOutputMB)
188 report_fatal_error(reason: "Error reading the kernel replay output file");
189
190 StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer();
191 StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer();
192 if (OriginalOutput == ReplayOutput)
193 outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n";
194 else
195 outs() << "[llvm-omp-kernel-replay] Replay device memory failed to "
196 "verify!\n";
197 }
198
199 delete[] recored_data;
200
201 return 0;
202}
203

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp