1 | //===-- lib/cuda/allocatable.cpp --------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "flang/Runtime/CUDA/allocatable.h" |
10 | #include "flang-rt/runtime/assign-impl.h" |
11 | #include "flang-rt/runtime/descriptor.h" |
12 | #include "flang-rt/runtime/stat.h" |
13 | #include "flang-rt/runtime/terminator.h" |
14 | #include "flang/Runtime/CUDA/common.h" |
15 | #include "flang/Runtime/CUDA/descriptor.h" |
16 | #include "flang/Runtime/CUDA/memmove-function.h" |
17 | #include "flang/Runtime/allocatable.h" |
18 | |
19 | #include "cuda_runtime.h" |
20 | |
21 | namespace Fortran::runtime::cuda { |
22 | |
23 | extern "C" { |
24 | RT_EXT_API_GROUP_BEGIN |
25 | |
26 | int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream, |
27 | bool *pinned, bool hasStat, const Descriptor *errMsg, |
28 | const char *sourceFile, int sourceLine) { |
29 | int stat{RTNAME(CUFAllocatableAllocate)( |
30 | desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; |
31 | #ifndef RT_DEVICE_COMPILATION |
32 | // Descriptor synchronization is only done when the allocation is done |
33 | // from the host. |
34 | if (stat == StatOk) { |
35 | void *deviceAddr{ |
36 | RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)}; |
37 | RTNAME(CUFDescriptorSync) |
38 | ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine); |
39 | } |
40 | #endif |
41 | return stat; |
42 | } |
43 | |
44 | int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream, |
45 | bool *pinned, bool hasStat, const Descriptor *errMsg, |
46 | const char *sourceFile, int sourceLine) { |
47 | if (desc.HasAddendum()) { |
48 | Terminator terminator{sourceFile, sourceLine}; |
49 | // TODO: This require a bit more work to set the correct type descriptor |
50 | // address |
51 | terminator.Crash( |
52 | "not yet implemented: CUDA descriptor allocation with addendum" ); |
53 | } |
54 | // Perform the standard allocation. |
55 | int stat{RTNAME(AllocatableAllocate)( |
56 | desc, stream, hasStat, errMsg, sourceFile, sourceLine)}; |
57 | if (pinned) { |
58 | // Set pinned according to stat. More infrastructre is needed to set it |
59 | // closer to the actual allocation call. |
60 | *pinned = (stat == StatOk); |
61 | } |
62 | return stat; |
63 | } |
64 | |
65 | int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, |
66 | const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, |
67 | const Descriptor *errMsg, const char *sourceFile, int sourceLine) { |
68 | int stat{RTNAME(CUFAllocatableAllocate)( |
69 | alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; |
70 | if (stat == StatOk) { |
71 | Terminator terminator{sourceFile, sourceLine}; |
72 | Fortran::runtime::DoFromSourceAssign( |
73 | alloc, source, terminator, &MemmoveHostToDevice); |
74 | } |
75 | return stat; |
76 | } |
77 | |
78 | int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, |
79 | const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, |
80 | const Descriptor *errMsg, const char *sourceFile, int sourceLine) { |
81 | int stat{RTNAME(CUFAllocatableAllocateSync)( |
82 | alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; |
83 | if (stat == StatOk) { |
84 | Terminator terminator{sourceFile, sourceLine}; |
85 | Fortran::runtime::DoFromSourceAssign( |
86 | alloc, source, terminator, &MemmoveHostToDevice); |
87 | } |
88 | return stat; |
89 | } |
90 | |
91 | int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat, |
92 | const Descriptor *errMsg, const char *sourceFile, int sourceLine) { |
93 | // Perform the standard allocation. |
94 | int stat{RTNAME(AllocatableDeallocate)( |
95 | desc, hasStat, errMsg, sourceFile, sourceLine)}; |
96 | #ifndef RT_DEVICE_COMPILATION |
97 | // Descriptor synchronization is only done when the deallocation is done |
98 | // from the host. |
99 | if (stat == StatOk) { |
100 | void *deviceAddr{ |
101 | RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)}; |
102 | RTNAME(CUFDescriptorSync) |
103 | ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine); |
104 | } |
105 | #endif |
106 | return stat; |
107 | } |
108 | |
109 | RT_EXT_API_GROUP_END |
110 | |
111 | } // extern "C" |
112 | |
113 | } // namespace Fortran::runtime::cuda |
114 | |