1 | //===--------- device.cpp - Target independent OpenMP target RTL ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Functionality for managing devices that are handled by RTL plugins. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "device.h" |
14 | #include "OffloadEntry.h" |
15 | #include "OpenMP/Mapping.h" |
16 | #include "OpenMP/OMPT/Callback.h" |
17 | #include "OpenMP/OMPT/Interface.h" |
18 | #include "PluginManager.h" |
19 | #include "Shared/APITypes.h" |
20 | #include "Shared/Debug.h" |
21 | #include "omptarget.h" |
22 | #include "private.h" |
23 | #include "rtl.h" |
24 | |
25 | #include "Shared/EnvironmentVar.h" |
26 | #include "llvm/Support/Error.h" |
27 | |
28 | #include <cassert> |
29 | #include <climits> |
30 | #include <cstdint> |
31 | #include <cstdio> |
32 | #include <mutex> |
33 | #include <string> |
34 | #include <thread> |
35 | |
36 | #ifdef OMPT_SUPPORT |
37 | using namespace llvm::omp::target::ompt; |
38 | #endif |
39 | |
40 | int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, |
41 | AsyncInfoTy &AsyncInfo) const { |
42 | // First, check if the user disabled atomic map transfer/malloc/dealloc. |
43 | if (!MappingConfig::get().UseEventsForAtomicTransfers) |
44 | return OFFLOAD_SUCCESS; |
45 | |
46 | void *Event = getEvent(); |
47 | bool NeedNewEvent = Event == nullptr; |
48 | if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { |
49 | REPORT("Failed to create event\n" ); |
50 | return OFFLOAD_FAIL; |
51 | } |
52 | |
53 | // We cannot assume the event should not be nullptr because we don't |
54 | // know if the target support event. But if a target doesn't, |
55 | // recordEvent should always return success. |
56 | if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { |
57 | REPORT("Failed to set dependence on event " DPxMOD "\n" , DPxPTR(Event)); |
58 | return OFFLOAD_FAIL; |
59 | } |
60 | |
61 | if (NeedNewEvent) |
62 | setEvent(Event); |
63 | |
64 | return OFFLOAD_SUCCESS; |
65 | } |
66 | |
67 | DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID) |
68 | : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), |
69 | MappingInfo(*this) {} |
70 | |
71 | DeviceTy::~DeviceTy() { |
72 | if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) |
73 | return; |
74 | |
75 | ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;" }; |
76 | dumpTargetPointerMappings(&Loc, *this); |
77 | } |
78 | |
79 | llvm::Error DeviceTy::init() { |
80 | // Make call to init_requires if it exists for this plugin. |
81 | int32_t Ret = 0; |
82 | Ret = RTL->init_requires(PM->getRequirements()); |
83 | if (Ret != OFFLOAD_SUCCESS) |
84 | return llvm::createStringError( |
85 | llvm::inconvertibleErrorCode(), |
86 | "Failed to initialize requirements for device %d\n" , DeviceID); |
87 | |
88 | Ret = RTL->init_device(RTLDeviceID); |
89 | if (Ret != OFFLOAD_SUCCESS) |
90 | return llvm::createStringError(llvm::inconvertibleErrorCode(), |
91 | "Failed to initialize device %d\n" , |
92 | DeviceID); |
93 | |
94 | // Enables recording kernels if set. |
95 | BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD" , false); |
96 | if (OMPX_RecordKernel) { |
97 | // Enables saving the device memory kernel output post execution if set. |
98 | BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT" , false); |
99 | |
100 | uint64_t ReqPtrArgOffset; |
101 | RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true, |
102 | OMPX_ReplaySaveOutput, ReqPtrArgOffset); |
103 | } |
104 | |
105 | return llvm::Error::success(); |
106 | } |
107 | |
108 | // Load binary to device. |
109 | llvm::Expected<__tgt_device_binary> |
110 | DeviceTy::loadBinary(__tgt_device_image *Img) { |
111 | __tgt_device_binary Binary; |
112 | |
113 | if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS) |
114 | return llvm::createStringError(llvm::inconvertibleErrorCode(), |
115 | "Failed to load binary %p" , Img); |
116 | return Binary; |
117 | } |
118 | |
119 | void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { |
120 | /// RAII to establish tool anchors before and after data allocation |
121 | void *TargetPtr = nullptr; |
122 | OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( |
123 | RegionInterface.getCallbacks<ompt_target_data_alloc>(), |
124 | DeviceID, HstPtr, &TargetPtr, Size, |
125 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
126 | |
127 | TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); |
128 | return TargetPtr; |
129 | } |
130 | |
131 | int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { |
132 | /// RAII to establish tool anchors before and after data deletion |
133 | OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII( |
134 | RegionInterface.getCallbacks<ompt_target_data_delete>(), |
135 | DeviceID, TgtAllocBegin, |
136 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
137 | |
138 | return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); |
139 | } |
140 | |
141 | // Submit data to device |
142 | int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, |
143 | AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry, |
144 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
145 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
146 | MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/true, |
147 | Entry, HDTTMapPtr); |
148 | |
149 | /// RAII to establish tool anchors before and after data submit |
150 | OMPT_IF_BUILT( |
151 | InterfaceRAII TargetDataSubmitRAII( |
152 | RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(), |
153 | omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size, |
154 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
155 | |
156 | return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, |
157 | AsyncInfo); |
158 | } |
159 | |
160 | // Retrieve data from device |
161 | int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, |
162 | int64_t Size, AsyncInfoTy &AsyncInfo, |
163 | HostDataToTargetTy *Entry, |
164 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
165 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
166 | MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/false, |
167 | Entry, HDTTMapPtr); |
168 | |
169 | /// RAII to establish tool anchors before and after data retrieval |
170 | OMPT_IF_BUILT( |
171 | InterfaceRAII TargetDataRetrieveRAII( |
172 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
173 | DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size, |
174 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
175 | |
176 | return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, |
177 | AsyncInfo); |
178 | } |
179 | |
180 | // Copy data from current device to destination device directly |
181 | int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, |
182 | int64_t Size, AsyncInfoTy &AsyncInfo) { |
183 | /// RAII to establish tool anchors before and after data exchange |
184 | /// Note: Despite the fact that this is a data exchange, we use 'from_device' |
185 | /// operation enum (w.r.t. ompt_target_data_op_t) as there is currently |
186 | /// no better alternative. It is still possible to distinguish this |
187 | /// scenario from a real data retrieve by checking if both involved |
188 | /// device numbers are less than omp_get_num_devices(). |
189 | OMPT_IF_BUILT( |
190 | InterfaceRAII TargetDataExchangeRAII( |
191 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
192 | RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size, |
193 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
194 | if (!AsyncInfo) { |
195 | assert(RTL->data_exchange && "RTL->data_exchange is nullptr" ); |
196 | return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, |
197 | Size); |
198 | } |
199 | return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, |
200 | DstPtr, Size, AsyncInfo); |
201 | } |
202 | |
203 | int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { |
204 | DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n" , |
205 | DPxPTR(HstPtr), Size); |
206 | |
207 | if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) { |
208 | REPORT("Notifiying about data mapping failed.\n" ); |
209 | return OFFLOAD_FAIL; |
210 | } |
211 | return OFFLOAD_SUCCESS; |
212 | } |
213 | |
214 | int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { |
215 | DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n" , DPxPTR(HstPtr)); |
216 | |
217 | if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { |
218 | REPORT("Notifiying about data unmapping failed.\n" ); |
219 | return OFFLOAD_FAIL; |
220 | } |
221 | return OFFLOAD_SUCCESS; |
222 | } |
223 | |
224 | // Run region on device |
225 | int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, |
226 | ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs, |
227 | AsyncInfoTy &AsyncInfo) { |
228 | return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, |
229 | &KernelArgs, AsyncInfo); |
230 | } |
231 | |
232 | // Run region on device |
233 | bool DeviceTy::printDeviceInfo() { |
234 | RTL->print_device_info(RTLDeviceID); |
235 | return true; |
236 | } |
237 | |
238 | // Whether data can be copied to DstDevice directly |
239 | bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { |
240 | if (RTL != DstDevice.RTL) |
241 | return false; |
242 | |
243 | if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) |
244 | return true; |
245 | return false; |
246 | } |
247 | |
248 | int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { |
249 | return RTL->synchronize(RTLDeviceID, AsyncInfo); |
250 | } |
251 | |
252 | int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) { |
253 | return RTL->query_async(RTLDeviceID, AsyncInfo); |
254 | } |
255 | |
256 | int32_t DeviceTy::createEvent(void **Event) { |
257 | return RTL->create_event(RTLDeviceID, Event); |
258 | } |
259 | |
260 | int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
261 | return RTL->record_event(RTLDeviceID, Event, AsyncInfo); |
262 | } |
263 | |
264 | int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
265 | return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); |
266 | } |
267 | |
268 | int32_t DeviceTy::syncEvent(void *Event) { |
269 | return RTL->sync_event(RTLDeviceID, Event); |
270 | } |
271 | |
272 | int32_t DeviceTy::destroyEvent(void *Event) { |
273 | return RTL->destroy_event(RTLDeviceID, Event); |
274 | } |
275 | |
276 | void DeviceTy::dumpOffloadEntries() { |
277 | fprintf(stderr, "Device %i offload entries:\n" , DeviceID); |
278 | for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) { |
279 | const char *Kind = "kernel" ; |
280 | if (It.second.isLink()) |
281 | Kind = "link" ; |
282 | else if (It.second.isGlobal()) |
283 | Kind = "global var." ; |
284 | fprintf(stderr, " %11s: %s\n" , Kind, It.second.getNameAsCStr()); |
285 | } |
286 | } |
287 | |
288 | bool DeviceTy::useAutoZeroCopy() { |
289 | return RTL->use_auto_zero_copy(RTLDeviceID); |
290 | } |
291 | |