| 1 | //===-- lib/cuda/memory.cpp -------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "flang/Runtime/CUDA/memory.h" |
| 10 | #include "flang-rt/runtime/assign-impl.h" |
| 11 | #include "flang-rt/runtime/descriptor.h" |
| 12 | #include "flang-rt/runtime/environment.h" |
| 13 | #include "flang-rt/runtime/terminator.h" |
| 14 | #include "flang/Runtime/CUDA/common.h" |
| 15 | #include "flang/Runtime/CUDA/descriptor.h" |
| 16 | #include "flang/Runtime/CUDA/memmove-function.h" |
| 17 | #include "flang/Runtime/assign.h" |
| 18 | |
| 19 | #include "cuda_runtime.h" |
| 20 | |
| 21 | namespace Fortran::runtime::cuda { |
| 22 | |
| 23 | extern "C" { |
| 24 | |
| 25 | void *RTDEF(CUFMemAlloc)( |
| 26 | std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { |
| 27 | void *ptr = nullptr; |
| 28 | if (bytes != 0) { |
| 29 | if (type == kMemTypeDevice) { |
| 30 | if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) { |
| 31 | CUDA_REPORT_IF_ERROR( |
| 32 | cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
| 33 | } else { |
| 34 | CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); |
| 35 | } |
| 36 | } else if (type == kMemTypeManaged || type == kMemTypeUnified) { |
| 37 | CUDA_REPORT_IF_ERROR( |
| 38 | cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
| 39 | } else if (type == kMemTypePinned) { |
| 40 | CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); |
| 41 | } else { |
| 42 | Terminator terminator{sourceFile, sourceLine}; |
| 43 | terminator.Crash("unsupported memory type" ); |
| 44 | } |
| 45 | } |
| 46 | return ptr; |
| 47 | } |
| 48 | |
| 49 | void RTDEF(CUFMemFree)( |
| 50 | void *ptr, unsigned type, const char *sourceFile, int sourceLine) { |
| 51 | if (!ptr) |
| 52 | return; |
| 53 | if (type == kMemTypeDevice || type == kMemTypeManaged || |
| 54 | type == kMemTypeUnified) { |
| 55 | CUDA_REPORT_IF_ERROR(cudaFree(ptr)); |
| 56 | } else if (type == kMemTypePinned) { |
| 57 | CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); |
| 58 | } else { |
| 59 | Terminator terminator{sourceFile, sourceLine}; |
| 60 | terminator.Crash("unsupported memory type" ); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | void RTDEF(CUFMemsetDescriptor)( |
| 65 | Descriptor *desc, void *value, const char *sourceFile, int sourceLine) { |
| 66 | Terminator terminator{sourceFile, sourceLine}; |
| 67 | terminator.Crash("not yet implemented: CUDA data transfer from a scalar " |
| 68 | "value to a descriptor" ); |
| 69 | } |
| 70 | |
| 71 | void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, |
| 72 | unsigned mode, const char *sourceFile, int sourceLine) { |
| 73 | cudaMemcpyKind kind; |
| 74 | if (mode == kHostToDevice) { |
| 75 | kind = cudaMemcpyHostToDevice; |
| 76 | } else if (mode == kDeviceToHost) { |
| 77 | kind = cudaMemcpyDeviceToHost; |
| 78 | } else if (mode == kDeviceToDevice) { |
| 79 | kind = cudaMemcpyDeviceToDevice; |
| 80 | } else { |
| 81 | Terminator terminator{sourceFile, sourceLine}; |
| 82 | terminator.Crash("host to host copy not supported" ); |
| 83 | } |
| 84 | // TODO: Use cudaMemcpyAsync when we have support for stream. |
| 85 | CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); |
| 86 | } |
| 87 | |
| 88 | void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, |
| 89 | std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { |
| 90 | Terminator terminator{sourceFile, sourceLine}; |
| 91 | terminator.Crash( |
| 92 | "not yet implemented: CUDA data transfer from a descriptor to a pointer" ); |
| 93 | } |
| 94 | |
| 95 | void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, |
| 96 | unsigned mode, const char *sourceFile, int sourceLine) { |
| 97 | MemmoveFct memmoveFct; |
| 98 | Terminator terminator{sourceFile, sourceLine}; |
| 99 | if (mode == kHostToDevice) { |
| 100 | memmoveFct = &MemmoveHostToDevice; |
| 101 | } else if (mode == kDeviceToHost) { |
| 102 | memmoveFct = &MemmoveDeviceToHost; |
| 103 | } else if (mode == kDeviceToDevice) { |
| 104 | memmoveFct = &MemmoveDeviceToDevice; |
| 105 | } else { |
| 106 | terminator.Crash("host to host copy not supported" ); |
| 107 | } |
| 108 | // Allocate dst descriptor if not allocated. |
| 109 | if (!dstDesc->IsAllocated()) { |
| 110 | dstDesc->ApplyMold(*srcDesc, dstDesc->rank()); |
| 111 | dstDesc->Allocate(/*asyncObject=*/nullptr); |
| 112 | } |
| 113 | if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) { |
| 114 | // Special case when rhs is bigger than lhs and both are contiguous arrays. |
| 115 | // In this case we do a simple ptr to ptr transfer with the size of lhs. |
| 116 | // This is be allowed in the reference compiler and it avoids error |
| 117 | // triggered in the Assign runtime function used for the main case below. |
| 118 | if (!srcDesc->IsContiguous() || !dstDesc->IsContiguous()) |
| 119 | terminator.Crash("Unsupported data transfer: mismatching element counts " |
| 120 | "with non-contiguous arrays" ); |
| 121 | RTNAME(CUFDataTransferPtrPtr)(dstDesc->raw().base_addr, |
| 122 | srcDesc->raw().base_addr, dstDesc->Elements() * dstDesc->ElementBytes(), |
| 123 | mode, sourceFile, sourceLine); |
| 124 | } else { |
| 125 | Fortran::runtime::Assign( |
| 126 | *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc, |
| 131 | unsigned mode, const char *sourceFile, int sourceLine) { |
| 132 | MemmoveFct memmoveFct; |
| 133 | Terminator terminator{sourceFile, sourceLine}; |
| 134 | if (mode == kHostToDevice) { |
| 135 | memmoveFct = &MemmoveHostToDevice; |
| 136 | } else if (mode == kDeviceToHost) { |
| 137 | memmoveFct = &MemmoveDeviceToHost; |
| 138 | } else if (mode == kDeviceToDevice) { |
| 139 | memmoveFct = &MemmoveDeviceToDevice; |
| 140 | } else { |
| 141 | terminator.Crash("host to host copy not supported" ); |
| 142 | } |
| 143 | |
| 144 | Fortran::runtime::DoFromSourceAssign( |
| 145 | *dstDesc, *srcDesc, terminator, memmoveFct); |
| 146 | } |
| 147 | |
| 148 | void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dstDesc, |
| 149 | Descriptor *srcDesc, unsigned mode, const char *sourceFile, |
| 150 | int sourceLine) { |
| 151 | MemmoveFct memmoveFct; |
| 152 | Terminator terminator{sourceFile, sourceLine}; |
| 153 | if (mode == kHostToDevice) { |
| 154 | memmoveFct = &MemmoveHostToDevice; |
| 155 | } else if (mode == kDeviceToHost) { |
| 156 | memmoveFct = &MemmoveDeviceToHost; |
| 157 | } else if (mode == kDeviceToDevice) { |
| 158 | memmoveFct = &MemmoveDeviceToDevice; |
| 159 | } else { |
| 160 | terminator.Crash("host to host copy not supported" ); |
| 161 | } |
| 162 | Fortran::runtime::Assign( |
| 163 | *dstDesc, *srcDesc, terminator, NoAssignFlags, memmoveFct); |
| 164 | } |
| 165 | |
| 166 | void RTDECL(CUFDataTransferGlobalDescDesc)(Descriptor *dstDesc, |
| 167 | Descriptor *srcDesc, unsigned mode, const char *sourceFile, |
| 168 | int sourceLine) { |
| 169 | RTNAME(CUFDataTransferDescDesc) |
| 170 | (dstDesc, srcDesc, mode, sourceFile, sourceLine); |
| 171 | if ((mode == kHostToDevice) || (mode == kDeviceToDevice)) { |
| 172 | void *deviceAddr{ |
| 173 | RTNAME(CUFGetDeviceAddress)((void *)dstDesc, sourceFile, sourceLine)}; |
| 174 | RTNAME(CUFDescriptorSync) |
| 175 | ((Descriptor *)deviceAddr, dstDesc, sourceFile, sourceLine); |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | } // namespace Fortran::runtime::cuda |
| 180 | |