1//===-- lib/cuda/memory.cpp -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Runtime/CUDA/memory.h"
10#include "flang-rt/runtime/assign-impl.h"
11#include "flang-rt/runtime/descriptor.h"
12#include "flang-rt/runtime/environment.h"
13#include "flang-rt/runtime/terminator.h"
14#include "flang/Runtime/CUDA/common.h"
15#include "flang/Runtime/CUDA/descriptor.h"
16#include "flang/Runtime/CUDA/memmove-function.h"
17#include "flang/Runtime/assign.h"
18
19#include "cuda_runtime.h"
20
21namespace Fortran::runtime::cuda {
22
23extern "C" {
24
25void *RTDEF(CUFMemAlloc)(
26 std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) {
27 void *ptr = nullptr;
28 if (bytes != 0) {
29 if (type == kMemTypeDevice) {
30 if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
31 CUDA_REPORT_IF_ERROR(
32 cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
33 } else {
34 CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
35 }
36 } else if (type == kMemTypeManaged || type == kMemTypeUnified) {
37 CUDA_REPORT_IF_ERROR(
38 cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
39 } else if (type == kMemTypePinned) {
40 CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes));
41 } else {
42 Terminator terminator{sourceFile, sourceLine};
43 terminator.Crash("unsupported memory type");
44 }
45 }
46 return ptr;
47}
48
49void RTDEF(CUFMemFree)(
50 void *ptr, unsigned type, const char *sourceFile, int sourceLine) {
51 if (!ptr)
52 return;
53 if (type == kMemTypeDevice || type == kMemTypeManaged ||
54 type == kMemTypeUnified) {
55 CUDA_REPORT_IF_ERROR(cudaFree(ptr));
56 } else if (type == kMemTypePinned) {
57 CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr));
58 } else {
59 Terminator terminator{sourceFile, sourceLine};
60 terminator.Crash("unsupported memory type");
61 }
62}
63
64void RTDEF(CUFMemsetDescriptor)(
65 Descriptor *desc, void *value, const char *sourceFile, int sourceLine) {
66 Terminator terminator{sourceFile, sourceLine};
67 terminator.Crash("not yet implemented: CUDA data transfer from a scalar "
68 "value to a descriptor");
69}
70
71void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
72 unsigned mode, const char *sourceFile, int sourceLine) {
73 cudaMemcpyKind kind;
74 if (mode == kHostToDevice) {
75 kind = cudaMemcpyHostToDevice;
76 } else if (mode == kDeviceToHost) {
77 kind = cudaMemcpyDeviceToHost;
78 } else if (mode == kDeviceToDevice) {
79 kind = cudaMemcpyDeviceToDevice;
80 } else {
81 Terminator terminator{sourceFile, sourceLine};
82 terminator.Crash("host to host copy not supported");
83 }
84 // TODO: Use cudaMemcpyAsync when we have support for stream.
85 CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
86}
87
88void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
89 std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) {
90 Terminator terminator{sourceFile, sourceLine};
91 terminator.Crash(
92 "not yet implemented: CUDA data transfer from a descriptor to a pointer");
93}
94
95void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
96 unsigned mode, const char *sourceFile, int sourceLine) {
97 MemmoveFct memmoveFct;
98 Terminator terminator{sourceFile, sourceLine};
99 if (mode == kHostToDevice) {
100 memmoveFct = &MemmoveHostToDevice;
101 } else if (mode == kDeviceToHost) {
102 memmoveFct = &MemmoveDeviceToHost;
103 } else if (mode == kDeviceToDevice) {
104 memmoveFct = &MemmoveDeviceToDevice;
105 } else {
106 terminator.Crash("host to host copy not supported");
107 }
108 // Allocate dst descriptor if not allocated.
109 if (!dstDesc->IsAllocated()) {
110 dstDesc->ApplyMold(*srcDesc, dstDesc->rank());
111 dstDesc->Allocate(/*asyncObject=*/nullptr);
112 }
113 if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) {
114 // Special case when rhs is bigger than lhs and both are contiguous arrays.
115 // In this case we do a simple ptr to ptr transfer with the size of lhs.
116 // This is be allowed in the reference compiler and it avoids error
117 // triggered in the Assign runtime function used for the main case below.
118 if (!srcDesc->IsContiguous() || !dstDesc->IsContiguous())
119 terminator.Crash("Unsupported data transfer: mismatching element counts "
120 "with non-contiguous arrays");
121 RTNAME(CUFDataTransferPtrPtr)(dstDesc->raw().base_addr,
122 srcDesc->raw().base_addr, dstDesc->Elements() * dstDesc->ElementBytes(),
123 mode, sourceFile, sourceLine);
124 } else {
125 Fortran::runtime::Assign(
126 *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
127 }
128}
129
130void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
131 unsigned mode, const char *sourceFile, int sourceLine) {
132 MemmoveFct memmoveFct;
133 Terminator terminator{sourceFile, sourceLine};
134 if (mode == kHostToDevice) {
135 memmoveFct = &MemmoveHostToDevice;
136 } else if (mode == kDeviceToHost) {
137 memmoveFct = &MemmoveDeviceToHost;
138 } else if (mode == kDeviceToDevice) {
139 memmoveFct = &MemmoveDeviceToDevice;
140 } else {
141 terminator.Crash("host to host copy not supported");
142 }
143
144 Fortran::runtime::DoFromSourceAssign(
145 *dstDesc, *srcDesc, terminator, memmoveFct);
146}
147
148void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dstDesc,
149 Descriptor *srcDesc, unsigned mode, const char *sourceFile,
150 int sourceLine) {
151 MemmoveFct memmoveFct;
152 Terminator terminator{sourceFile, sourceLine};
153 if (mode == kHostToDevice) {
154 memmoveFct = &MemmoveHostToDevice;
155 } else if (mode == kDeviceToHost) {
156 memmoveFct = &MemmoveDeviceToHost;
157 } else if (mode == kDeviceToDevice) {
158 memmoveFct = &MemmoveDeviceToDevice;
159 } else {
160 terminator.Crash("host to host copy not supported");
161 }
162 Fortran::runtime::Assign(
163 *dstDesc, *srcDesc, terminator, NoAssignFlags, memmoveFct);
164}
165
166void RTDECL(CUFDataTransferGlobalDescDesc)(Descriptor *dstDesc,
167 Descriptor *srcDesc, unsigned mode, const char *sourceFile,
168 int sourceLine) {
169 RTNAME(CUFDataTransferDescDesc)
170 (dstDesc, srcDesc, mode, sourceFile, sourceLine);
171 if ((mode == kHostToDevice) || (mode == kDeviceToDevice)) {
172 void *deviceAddr{
173 RTNAME(CUFGetDeviceAddress)((void *)dstDesc, sourceFile, sourceLine)};
174 RTNAME(CUFDescriptorSync)
175 ((Descriptor *)deviceAddr, dstDesc, sourceFile, sourceLine);
176 }
177}
178}
179} // namespace Fortran::runtime::cuda
180

source code of flang-rt/lib/cuda/memory.cpp