1 | //===-- lib/cuda/memory.cpp -------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "flang/Runtime/CUDA/memory.h" |
10 | #include "flang-rt/runtime/assign-impl.h" |
11 | #include "flang-rt/runtime/descriptor.h" |
12 | #include "flang-rt/runtime/environment.h" |
13 | #include "flang-rt/runtime/terminator.h" |
14 | #include "flang/Runtime/CUDA/common.h" |
15 | #include "flang/Runtime/CUDA/descriptor.h" |
16 | #include "flang/Runtime/CUDA/memmove-function.h" |
17 | #include "flang/Runtime/assign.h" |
18 | |
19 | #include "cuda_runtime.h" |
20 | |
21 | namespace Fortran::runtime::cuda { |
22 | |
23 | extern "C" { |
24 | |
25 | void *RTDEF(CUFMemAlloc)( |
26 | std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { |
27 | void *ptr = nullptr; |
28 | if (bytes != 0) { |
29 | if (type == kMemTypeDevice) { |
30 | if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) { |
31 | CUDA_REPORT_IF_ERROR( |
32 | cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
33 | } else { |
34 | CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); |
35 | } |
36 | } else if (type == kMemTypeManaged || type == kMemTypeUnified) { |
37 | CUDA_REPORT_IF_ERROR( |
38 | cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
39 | } else if (type == kMemTypePinned) { |
40 | CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); |
41 | } else { |
42 | Terminator terminator{sourceFile, sourceLine}; |
43 | terminator.Crash("unsupported memory type" ); |
44 | } |
45 | } |
46 | return ptr; |
47 | } |
48 | |
49 | void RTDEF(CUFMemFree)( |
50 | void *ptr, unsigned type, const char *sourceFile, int sourceLine) { |
51 | if (!ptr) |
52 | return; |
53 | if (type == kMemTypeDevice || type == kMemTypeManaged || |
54 | type == kMemTypeUnified) { |
55 | CUDA_REPORT_IF_ERROR(cudaFree(ptr)); |
56 | } else if (type == kMemTypePinned) { |
57 | CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); |
58 | } else { |
59 | Terminator terminator{sourceFile, sourceLine}; |
60 | terminator.Crash("unsupported memory type" ); |
61 | } |
62 | } |
63 | |
64 | void RTDEF(CUFMemsetDescriptor)( |
65 | Descriptor *desc, void *value, const char *sourceFile, int sourceLine) { |
66 | Terminator terminator{sourceFile, sourceLine}; |
67 | terminator.Crash("not yet implemented: CUDA data transfer from a scalar " |
68 | "value to a descriptor" ); |
69 | } |
70 | |
71 | void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, |
72 | unsigned mode, const char *sourceFile, int sourceLine) { |
73 | cudaMemcpyKind kind; |
74 | if (mode == kHostToDevice) { |
75 | kind = cudaMemcpyHostToDevice; |
76 | } else if (mode == kDeviceToHost) { |
77 | kind = cudaMemcpyDeviceToHost; |
78 | } else if (mode == kDeviceToDevice) { |
79 | kind = cudaMemcpyDeviceToDevice; |
80 | } else { |
81 | Terminator terminator{sourceFile, sourceLine}; |
82 | terminator.Crash("host to host copy not supported" ); |
83 | } |
84 | // TODO: Use cudaMemcpyAsync when we have support for stream. |
85 | CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); |
86 | } |
87 | |
88 | void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, |
89 | std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { |
90 | Terminator terminator{sourceFile, sourceLine}; |
91 | terminator.Crash( |
92 | "not yet implemented: CUDA data transfer from a descriptor to a pointer" ); |
93 | } |
94 | |
95 | void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, |
96 | unsigned mode, const char *sourceFile, int sourceLine) { |
97 | MemmoveFct memmoveFct; |
98 | Terminator terminator{sourceFile, sourceLine}; |
99 | if (mode == kHostToDevice) { |
100 | memmoveFct = &MemmoveHostToDevice; |
101 | } else if (mode == kDeviceToHost) { |
102 | memmoveFct = &MemmoveDeviceToHost; |
103 | } else if (mode == kDeviceToDevice) { |
104 | memmoveFct = &MemmoveDeviceToDevice; |
105 | } else { |
106 | terminator.Crash("host to host copy not supported" ); |
107 | } |
108 | // Allocate dst descriptor if not allocated. |
109 | if (!dstDesc->IsAllocated()) { |
110 | dstDesc->ApplyMold(*srcDesc, dstDesc->rank()); |
111 | dstDesc->Allocate(/*asyncObject=*/nullptr); |
112 | } |
113 | if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) { |
114 | // Special case when rhs is bigger than lhs and both are contiguous arrays. |
115 | // In this case we do a simple ptr to ptr transfer with the size of lhs. |
116 | // This is be allowed in the reference compiler and it avoids error |
117 | // triggered in the Assign runtime function used for the main case below. |
118 | if (!srcDesc->IsContiguous() || !dstDesc->IsContiguous()) |
119 | terminator.Crash("Unsupported data transfer: mismatching element counts " |
120 | "with non-contiguous arrays" ); |
121 | RTNAME(CUFDataTransferPtrPtr)(dstDesc->raw().base_addr, |
122 | srcDesc->raw().base_addr, dstDesc->Elements() * dstDesc->ElementBytes(), |
123 | mode, sourceFile, sourceLine); |
124 | } else { |
125 | Fortran::runtime::Assign( |
126 | *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct); |
127 | } |
128 | } |
129 | |
130 | void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc, |
131 | unsigned mode, const char *sourceFile, int sourceLine) { |
132 | MemmoveFct memmoveFct; |
133 | Terminator terminator{sourceFile, sourceLine}; |
134 | if (mode == kHostToDevice) { |
135 | memmoveFct = &MemmoveHostToDevice; |
136 | } else if (mode == kDeviceToHost) { |
137 | memmoveFct = &MemmoveDeviceToHost; |
138 | } else if (mode == kDeviceToDevice) { |
139 | memmoveFct = &MemmoveDeviceToDevice; |
140 | } else { |
141 | terminator.Crash("host to host copy not supported" ); |
142 | } |
143 | |
144 | Fortran::runtime::DoFromSourceAssign( |
145 | *dstDesc, *srcDesc, terminator, memmoveFct); |
146 | } |
147 | |
148 | void RTDECL(CUFDataTransferDescDescNoRealloc)(Descriptor *dstDesc, |
149 | Descriptor *srcDesc, unsigned mode, const char *sourceFile, |
150 | int sourceLine) { |
151 | MemmoveFct memmoveFct; |
152 | Terminator terminator{sourceFile, sourceLine}; |
153 | if (mode == kHostToDevice) { |
154 | memmoveFct = &MemmoveHostToDevice; |
155 | } else if (mode == kDeviceToHost) { |
156 | memmoveFct = &MemmoveDeviceToHost; |
157 | } else if (mode == kDeviceToDevice) { |
158 | memmoveFct = &MemmoveDeviceToDevice; |
159 | } else { |
160 | terminator.Crash("host to host copy not supported" ); |
161 | } |
162 | Fortran::runtime::Assign( |
163 | *dstDesc, *srcDesc, terminator, NoAssignFlags, memmoveFct); |
164 | } |
165 | |
166 | void RTDECL(CUFDataTransferGlobalDescDesc)(Descriptor *dstDesc, |
167 | Descriptor *srcDesc, unsigned mode, const char *sourceFile, |
168 | int sourceLine) { |
169 | RTNAME(CUFDataTransferDescDesc) |
170 | (dstDesc, srcDesc, mode, sourceFile, sourceLine); |
171 | if ((mode == kHostToDevice) || (mode == kDeviceToDevice)) { |
172 | void *deviceAddr{ |
173 | RTNAME(CUFGetDeviceAddress)((void *)dstDesc, sourceFile, sourceLine)}; |
174 | RTNAME(CUFDescriptorSync) |
175 | ((Descriptor *)deviceAddr, dstDesc, sourceFile, sourceLine); |
176 | } |
177 | } |
178 | } |
179 | } // namespace Fortran::runtime::cuda |
180 | |