| 1 | //===--------- device.cpp - Target independent OpenMP target RTL ----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Functionality for managing devices that are handled by RTL plugins. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "device.h" |
| 14 | #include "OffloadEntry.h" |
| 15 | #include "OpenMP/Mapping.h" |
| 16 | #include "OpenMP/OMPT/Callback.h" |
| 17 | #include "OpenMP/OMPT/Interface.h" |
| 18 | #include "PluginManager.h" |
| 19 | #include "Shared/APITypes.h" |
| 20 | #include "Shared/Debug.h" |
| 21 | #include "omptarget.h" |
| 22 | #include "private.h" |
| 23 | #include "rtl.h" |
| 24 | |
| 25 | #include "Shared/EnvironmentVar.h" |
| 26 | #include "llvm/Support/Error.h" |
| 27 | |
| 28 | #include <cassert> |
| 29 | #include <climits> |
| 30 | #include <cstdint> |
| 31 | #include <cstdio> |
| 32 | #include <mutex> |
| 33 | #include <string> |
| 34 | #include <thread> |
| 35 | |
| 36 | #ifdef OMPT_SUPPORT |
| 37 | using namespace llvm::omp::target::ompt; |
| 38 | #endif |
| 39 | |
| 40 | int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, |
| 41 | AsyncInfoTy &AsyncInfo) const { |
| 42 | // First, check if the user disabled atomic map transfer/malloc/dealloc. |
| 43 | if (!MappingConfig::get().UseEventsForAtomicTransfers) |
| 44 | return OFFLOAD_SUCCESS; |
| 45 | |
| 46 | void *Event = getEvent(); |
| 47 | bool NeedNewEvent = Event == nullptr; |
| 48 | if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { |
| 49 | REPORT("Failed to create event\n" ); |
| 50 | return OFFLOAD_FAIL; |
| 51 | } |
| 52 | |
| 53 | // We cannot assume the event should not be nullptr because we don't |
| 54 | // know if the target support event. But if a target doesn't, |
| 55 | // recordEvent should always return success. |
| 56 | if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { |
| 57 | REPORT("Failed to set dependence on event " DPxMOD "\n" , DPxPTR(Event)); |
| 58 | return OFFLOAD_FAIL; |
| 59 | } |
| 60 | |
| 61 | if (NeedNewEvent) |
| 62 | setEvent(Event); |
| 63 | |
| 64 | return OFFLOAD_SUCCESS; |
| 65 | } |
| 66 | |
| 67 | DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID) |
| 68 | : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), |
| 69 | MappingInfo(*this) {} |
| 70 | |
| 71 | DeviceTy::~DeviceTy() { |
| 72 | if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) |
| 73 | return; |
| 74 | |
| 75 | ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;" }; |
| 76 | dumpTargetPointerMappings(&Loc, *this); |
| 77 | } |
| 78 | |
| 79 | llvm::Error DeviceTy::init() { |
| 80 | int32_t Ret = RTL->init_device(RTLDeviceID); |
| 81 | if (Ret != OFFLOAD_SUCCESS) |
| 82 | return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE, |
| 83 | "failed to initialize device %d\n" , |
| 84 | DeviceID); |
| 85 | |
| 86 | // Enables recording kernels if set. |
| 87 | BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD" , false); |
| 88 | if (OMPX_RecordKernel) { |
| 89 | // Enables saving the device memory kernel output post execution if set. |
| 90 | BoolEnvar OMPX_ReplaySaveOutput("LIBOMPTARGET_RR_SAVE_OUTPUT" , false); |
| 91 | |
| 92 | uint64_t ReqPtrArgOffset; |
| 93 | RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true, |
| 94 | OMPX_ReplaySaveOutput, ReqPtrArgOffset); |
| 95 | } |
| 96 | |
| 97 | return llvm::Error::success(); |
| 98 | } |
| 99 | |
| 100 | // Load binary to device. |
| 101 | llvm::Expected<__tgt_device_binary> |
| 102 | DeviceTy::loadBinary(__tgt_device_image *Img) { |
| 103 | __tgt_device_binary Binary; |
| 104 | |
| 105 | if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS) |
| 106 | return error::createOffloadError(error::ErrorCode::INVALID_BINARY, |
| 107 | "failed to load binary %p" , Img); |
| 108 | return Binary; |
| 109 | } |
| 110 | |
| 111 | void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { |
| 112 | /// RAII to establish tool anchors before and after data allocation |
| 113 | void *TargetPtr = nullptr; |
| 114 | OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( |
| 115 | RegionInterface.getCallbacks<ompt_target_data_alloc>(), |
| 116 | DeviceID, HstPtr, &TargetPtr, Size, |
| 117 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
| 118 | |
| 119 | TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); |
| 120 | return TargetPtr; |
| 121 | } |
| 122 | |
| 123 | int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { |
| 124 | /// RAII to establish tool anchors before and after data deletion |
| 125 | OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII( |
| 126 | RegionInterface.getCallbacks<ompt_target_data_delete>(), |
| 127 | DeviceID, TgtAllocBegin, |
| 128 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
| 129 | |
| 130 | return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); |
| 131 | } |
| 132 | |
| 133 | // Submit data to device |
| 134 | int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, |
| 135 | AsyncInfoTy &AsyncInfo, HostDataToTargetTy *Entry, |
| 136 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
| 137 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
| 138 | MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/true, |
| 139 | Entry, HDTTMapPtr); |
| 140 | |
| 141 | /// RAII to establish tool anchors before and after data submit |
| 142 | OMPT_IF_BUILT( |
| 143 | InterfaceRAII TargetDataSubmitRAII( |
| 144 | RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(), |
| 145 | omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size, |
| 146 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
| 147 | |
| 148 | return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, |
| 149 | AsyncInfo); |
| 150 | } |
| 151 | |
| 152 | // Retrieve data from device |
| 153 | int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, |
| 154 | int64_t Size, AsyncInfoTy &AsyncInfo, |
| 155 | HostDataToTargetTy *Entry, |
| 156 | MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr) { |
| 157 | if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) |
| 158 | MappingInfo.printCopyInfo(TgtPtrBegin, HstPtrBegin, Size, /*H2D=*/false, |
| 159 | Entry, HDTTMapPtr); |
| 160 | |
| 161 | /// RAII to establish tool anchors before and after data retrieval |
| 162 | OMPT_IF_BUILT( |
| 163 | InterfaceRAII TargetDataRetrieveRAII( |
| 164 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
| 165 | DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size, |
| 166 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
| 167 | |
| 168 | return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, |
| 169 | AsyncInfo); |
| 170 | } |
| 171 | |
| 172 | // Copy data from current device to destination device directly |
| 173 | int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, |
| 174 | int64_t Size, AsyncInfoTy &AsyncInfo) { |
| 175 | /// RAII to establish tool anchors before and after data exchange |
| 176 | /// Note: Despite the fact that this is a data exchange, we use 'from_device' |
| 177 | /// operation enum (w.r.t. ompt_target_data_op_t) as there is currently |
| 178 | /// no better alternative. It is still possible to distinguish this |
| 179 | /// scenario from a real data retrieve by checking if both involved |
| 180 | /// device numbers are less than omp_get_num_devices(). |
| 181 | OMPT_IF_BUILT( |
| 182 | InterfaceRAII TargetDataExchangeRAII( |
| 183 | RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(), |
| 184 | RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size, |
| 185 | /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) |
| 186 | if (!AsyncInfo) { |
| 187 | return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, |
| 188 | Size); |
| 189 | } |
| 190 | return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, |
| 191 | DstPtr, Size, AsyncInfo); |
| 192 | } |
| 193 | |
| 194 | int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { |
| 195 | DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n" , |
| 196 | DPxPTR(HstPtr), Size); |
| 197 | |
| 198 | if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) { |
| 199 | REPORT("Notifying about data mapping failed.\n" ); |
| 200 | return OFFLOAD_FAIL; |
| 201 | } |
| 202 | return OFFLOAD_SUCCESS; |
| 203 | } |
| 204 | |
| 205 | int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { |
| 206 | DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n" , DPxPTR(HstPtr)); |
| 207 | |
| 208 | if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { |
| 209 | REPORT("Notifying about data unmapping failed.\n" ); |
| 210 | return OFFLOAD_FAIL; |
| 211 | } |
| 212 | return OFFLOAD_SUCCESS; |
| 213 | } |
| 214 | |
| 215 | // Run region on device |
| 216 | int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, |
| 217 | ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs, |
| 218 | AsyncInfoTy &AsyncInfo) { |
| 219 | return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, |
| 220 | &KernelArgs, AsyncInfo); |
| 221 | } |
| 222 | |
| 223 | // Run region on device |
| 224 | bool DeviceTy::printDeviceInfo() { |
| 225 | RTL->print_device_info(RTLDeviceID); |
| 226 | return true; |
| 227 | } |
| 228 | |
| 229 | // Whether data can be copied to DstDevice directly |
| 230 | bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { |
| 231 | if (RTL != DstDevice.RTL) |
| 232 | return false; |
| 233 | |
| 234 | if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) |
| 235 | return true; |
| 236 | return false; |
| 237 | } |
| 238 | |
| 239 | int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { |
| 240 | return RTL->synchronize(RTLDeviceID, AsyncInfo); |
| 241 | } |
| 242 | |
| 243 | int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) { |
| 244 | return RTL->query_async(RTLDeviceID, AsyncInfo); |
| 245 | } |
| 246 | |
| 247 | int32_t DeviceTy::createEvent(void **Event) { |
| 248 | return RTL->create_event(RTLDeviceID, Event); |
| 249 | } |
| 250 | |
| 251 | int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
| 252 | return RTL->record_event(RTLDeviceID, Event, AsyncInfo); |
| 253 | } |
| 254 | |
| 255 | int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { |
| 256 | return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); |
| 257 | } |
| 258 | |
| 259 | int32_t DeviceTy::syncEvent(void *Event) { |
| 260 | return RTL->sync_event(RTLDeviceID, Event); |
| 261 | } |
| 262 | |
| 263 | int32_t DeviceTy::destroyEvent(void *Event) { |
| 264 | return RTL->destroy_event(RTLDeviceID, Event); |
| 265 | } |
| 266 | |
| 267 | void DeviceTy::dumpOffloadEntries() { |
| 268 | fprintf(stderr, "Device %i offload entries:\n" , DeviceID); |
| 269 | for (auto &It : *DeviceOffloadEntries.getExclusiveAccessor()) { |
| 270 | const char *Kind = "kernel" ; |
| 271 | if (It.second.isLink()) |
| 272 | Kind = "link" ; |
| 273 | else if (It.second.isGlobal()) |
| 274 | Kind = "global var." ; |
| 275 | fprintf(stderr, " %11s: %s\n" , Kind, It.second.getNameAsCStr()); |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | bool DeviceTy::useAutoZeroCopy() { |
| 280 | if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) |
| 281 | return false; |
| 282 | return RTL->use_auto_zero_copy(RTLDeviceID); |
| 283 | } |
| 284 | |