1 | //===--------- device.cpp - Target independent OpenMP target RTL ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Functionality for managing devices that are handled by RTL plugins. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "device.h" |
14 | #include "OffloadEntry.h" |
15 | #include "OpenMP/Mapping.h" |
16 | #include "OpenMP/OMPT/Callback.h" |
17 | #include "OpenMP/OMPT/Interface.h" |
18 | #include "PluginManager.h" |
19 | #include "Shared/APITypes.h" |
20 | #include "Shared/Debug.h" |
21 | #include "omptarget.h" |
22 | #include "private.h" |
23 | #include "rtl.h" |
24 | |
25 | #include "Shared/EnvironmentVar.h" |
26 | #include "llvm/Support/Error.h" |
27 | |
28 | #include <cassert> |
29 | #include <climits> |
30 | #include <cstdint> |
31 | #include <cstdio> |
32 | #include <mutex> |
33 | #include <string> |
34 | #include <thread> |
35 | |
36 | #ifdef OMPT_SUPPORT |
37 | using namespace llvm::omp::target::ompt; |
38 | #endif |
39 | |
40 | int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, |
41 | AsyncInfoTy &AsyncInfo) const { |
42 | // First, check if the user disabled atomic map transfer/malloc/dealloc. |
43 | if (!MappingConfig::get().UseEventsForAtomicTransfers) |
44 | return OFFLOAD_SUCCESS; |
45 | |
46 | void *Event = getEvent(); |
47 | bool NeedNewEvent = Event == nullptr; |
48 | if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { |
49 | REPORT("Failed to create event\n" ); |
50 | return OFFLOAD_FAIL; |
51 | } |
52 | |
53 | // We cannot assume the event should not be nullptr because we don't |
54 | // know if the target support event. But if a target doesn't, |
55 | // recordEvent should always return success. |
56 | if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { |
57 | REPORT("Failed to set dependence on event " DPxMOD "\n" , DPxPTR(Event)); |
58 | return OFFLOAD_FAIL; |
59 | } |
60 | |
61 | if (NeedNewEvent) |
62 | setEvent(Event); |
63 | |
64 | return OFFLOAD_SUCCESS; |
65 | } |
66 | |
67 | DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID) |
68 | : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), |
69 | MappingInfo(*this) {} |
70 | |
71 | DeviceTy::~DeviceTy() { |
72 | if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) |
73 | return; |
74 | |
75 | ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;" }; |
76 | dumpTargetPointerMappings(&Loc, *this); |
77 | } |
78 | |
79 | llvm::Error DeviceTy::init() { |
80 | int32_t Ret = RTL->init_device(RTLDeviceID); |
81 | if (Ret != OFFLOAD_SUCCESS) |
82 | return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE, |
83 | "failed to initialize device %d\n" , |
84 | DeviceID); |
85 | |
86 | // Enables recording kernels if set. |
87 | BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD" , false); |
88 | if (OMPX_RecordKernel) { |
89 | // Enables saving the device memory kernel output post execution if set. |
90 | BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT" , false); |
91 | |
92 | uint64_t ReqPtrArgOffset; |
93 | RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true, |
94 | OMPX_ReplaySaveOutput, ReqPtrArgOffset); |
95 | } |
96 | |
97 | return llvm::Error::success(); |
98 | } |
99 | |
100 | // Load binary to device. |
101 | llvm::Expected<__tgt_device_binary> |
102 | DeviceTy::loadBinary(__tgt_device_image *Img) { |
103 | __tgt_device_binary Binary; |
104 | |
105 | if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS) |
106 | return error::createOffloadError(error::ErrorCode::INVALID_BINARY, |
107 | "failed to load binary %p" , Img); |
108 | return Binary; |
109 | } |
110 | |
111 | void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { |
112 | /// RAII to establish tool anchors before and after data allocation |
113 | void *TargetPtr = nullptr; |
114 | OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( |
115 | RegionInterface.getCallbacks<ompt_target_data_alloc>(), |
116 | DeviceID, HstPtr, &TargetPtr, Size, |
117 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
118 | |
119 | TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); |
120 | return TargetPtr; |
121 | } |
122 | |
123 | int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { |
124 | /// RAII to establish tool anchors before and after data deletion |
125 | OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII( |
126 | RegionInterface.getCallbacks<ompt_target_data_delete>(), |
127 | DeviceID, TgtAllocBegin, |
128 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
129 | |
130 | return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); |
131 | } |
132 | |
133 | // Submit data to device |
134 | int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, |
135 | AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry, |
136 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
137 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
138 | MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/true, |
139 | Entry, HDTTMapPtr); |
140 | |
141 | /// RAII to establish tool anchors before and after data submit |
142 | OMPT_IF_BUILT( |
143 | InterfaceRAII TargetDataSubmitRAII( |
144 | RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(), |
145 | omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size, |
146 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
147 | |
148 | return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, |
149 | AsyncInfo); |
150 | } |
151 | |
152 | // Retrieve data from device |
153 | int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, |
154 | int64_t Size, AsyncInfoTy &AsyncInfo, |
155 | HostDataToTargetTy *Entry, |
156 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
157 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
158 | MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/false, |
159 | Entry, HDTTMapPtr); |
160 | |
161 | /// RAII to establish tool anchors before and after data retrieval |
162 | OMPT_IF_BUILT( |
163 | InterfaceRAII TargetDataRetrieveRAII( |
164 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
165 | DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size, |
166 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
167 | |
168 | return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, |
169 | AsyncInfo); |
170 | } |
171 | |
172 | // Copy data from current device to destination device directly |
173 | int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, |
174 | int64_t Size, AsyncInfoTy &AsyncInfo) { |
175 | /// RAII to establish tool anchors before and after data exchange |
176 | /// Note: Despite the fact that this is a data exchange, we use 'from_device' |
177 | /// operation enum (w.r.t. ompt_target_data_op_t) as there is currently |
178 | /// no better alternative. It is still possible to distinguish this |
179 | /// scenario from a real data retrieve by checking if both involved |
180 | /// device numbers are less than omp_get_num_devices(). |
181 | OMPT_IF_BUILT( |
182 | InterfaceRAII TargetDataExchangeRAII( |
183 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
184 | RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size, |
185 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
186 | if (!AsyncInfo) { |
187 | return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, |
188 | Size); |
189 | } |
190 | return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, |
191 | DstPtr, Size, AsyncInfo); |
192 | } |
193 | |
194 | int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { |
195 | DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n" , |
196 | DPxPTR(HstPtr), Size); |
197 | |
198 | if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) { |
199 | REPORT("Notifying about data mapping failed.\n" ); |
200 | return OFFLOAD_FAIL; |
201 | } |
202 | return OFFLOAD_SUCCESS; |
203 | } |
204 | |
205 | int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { |
206 | DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n" , DPxPTR(HstPtr)); |
207 | |
208 | if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { |
209 | REPORT("Notifying about data unmapping failed.\n" ); |
210 | return OFFLOAD_FAIL; |
211 | } |
212 | return OFFLOAD_SUCCESS; |
213 | } |
214 | |
215 | // Run region on device |
216 | int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, |
217 | ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs, |
218 | AsyncInfoTy &AsyncInfo) { |
219 | return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, |
220 | &KernelArgs, AsyncInfo); |
221 | } |
222 | |
223 | // Run region on device |
224 | bool DeviceTy::printDeviceInfo() { |
225 | RTL->print_device_info(RTLDeviceID); |
226 | return true; |
227 | } |
228 | |
229 | // Whether data can be copied to DstDevice directly |
230 | bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { |
231 | if (RTL != DstDevice.RTL) |
232 | return false; |
233 | |
234 | if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) |
235 | return true; |
236 | return false; |
237 | } |
238 | |
239 | int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { |
240 | return RTL->synchronize(RTLDeviceID, AsyncInfo); |
241 | } |
242 | |
243 | int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) { |
244 | return RTL->query_async(RTLDeviceID, AsyncInfo); |
245 | } |
246 | |
247 | int32_t DeviceTy::createEvent(void **Event) { |
248 | return RTL->create_event(RTLDeviceID, Event); |
249 | } |
250 | |
251 | int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
252 | return RTL->record_event(RTLDeviceID, Event, AsyncInfo); |
253 | } |
254 | |
255 | int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
256 | return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); |
257 | } |
258 | |
259 | int32_t DeviceTy::syncEvent(void *Event) { |
260 | return RTL->sync_event(RTLDeviceID, Event); |
261 | } |
262 | |
263 | int32_t DeviceTy::destroyEvent(void *Event) { |
264 | return RTL->destroy_event(RTLDeviceID, Event); |
265 | } |
266 | |
267 | void DeviceTy::dumpOffloadEntries() { |
268 | fprintf(stderr, "Device %i offload entries:\n" , DeviceID); |
269 | for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) { |
270 | const char *Kind = "kernel" ; |
271 | if (It.second.isLink()) |
272 | Kind = "link" ; |
273 | else if (It.second.isGlobal()) |
274 | Kind = "global var." ; |
275 | fprintf(stderr, " %11s: %s\n" , Kind, It.second.getNameAsCStr()); |
276 | } |
277 | } |
278 | |
279 | bool DeviceTy::useAutoZeroCopy() { |
280 | if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) |
281 | return false; |
282 | return RTL->use_auto_zero_copy(RTLDeviceID); |
283 | } |
284 | |