1 | //===-- Generic device loader interface -----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H |
10 | #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H |
11 | |
12 | #include "utils/gpu/server/llvmlibc_rpc_server.h" |
13 | |
14 | #include "llvm-libc-types/rpc_opcodes_t.h" |
15 | #include "include/llvm-libc-types/test_rpc_opcodes_t.h" |
16 | |
17 | #include <cstddef> |
18 | #include <cstdint> |
19 | #include <cstdio> |
20 | #include <cstdlib> |
21 | #include <cstring> |
22 | |
23 | /// Generic launch parameters for configuration the number of blocks / threads. |
24 | struct LaunchParameters { |
25 | uint32_t num_threads_x; |
26 | uint32_t num_threads_y; |
27 | uint32_t num_threads_z; |
28 | uint32_t num_blocks_x; |
29 | uint32_t num_blocks_y; |
30 | uint32_t num_blocks_z; |
31 | }; |
32 | |
33 | /// The arguments to the '_begin' kernel. |
34 | struct begin_args_t { |
35 | int argc; |
36 | void *argv; |
37 | void *envp; |
38 | }; |
39 | |
40 | /// The arguments to the '_start' kernel. |
41 | struct start_args_t { |
42 | int argc; |
43 | void *argv; |
44 | void *envp; |
45 | void *ret; |
46 | }; |
47 | |
48 | /// The arguments to the '_end' kernel. |
49 | struct end_args_t { |
50 | int argc; |
51 | }; |
52 | |
53 | /// Generic interface to load the \p image and launch execution of the _start |
54 | /// kernel on the target device. Copies \p argc and \p argv to the device. |
55 | /// Returns the final value of the `main` function on the device. |
56 | int load(int argc, char **argv, char **evnp, void *image, size_t size, |
57 | const LaunchParameters ¶ms); |
58 | |
59 | /// Return \p V aligned "upwards" according to \p Align. |
60 | template <typename V, typename A> inline V align_up(V val, A align) { |
61 | return ((val + V(align) - 1) / V(align)) * V(align); |
62 | } |
63 | |
64 | /// Copy the system's argument vector to GPU memory allocated using \p alloc. |
65 | template <typename Allocator> |
66 | void *copy_argument_vector(int argc, char **argv, Allocator alloc) { |
67 | size_t argv_size = sizeof(char *) * (argc + 1); |
68 | size_t str_size = 0; |
69 | for (int i = 0; i < argc; ++i) |
70 | str_size += strlen(s: argv[i]) + 1; |
71 | |
72 | // We allocate enough space for a null terminated array and all the strings. |
73 | void *dev_argv = alloc(argv_size + str_size); |
74 | if (!dev_argv) |
75 | return nullptr; |
76 | |
77 | // Store the strings linerally in the same memory buffer. |
78 | void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size; |
79 | for (int i = 0; i < argc; ++i) { |
80 | size_t size = strlen(s: argv[i]) + 1; |
81 | std::memcpy(dest: dev_str, src: argv[i], n: size); |
82 | static_cast<void **>(dev_argv)[i] = dev_str; |
83 | dev_str = reinterpret_cast<uint8_t *>(dev_str) + size; |
84 | } |
85 | |
86 | // Ensure the vector is null terminated. |
87 | reinterpret_cast<void **>(dev_argv)[argv_size] = nullptr; |
88 | return dev_argv; |
89 | } |
90 | |
91 | /// Copy the system's environment to GPU memory allocated using \p alloc. |
92 | template <typename Allocator> |
93 | void *copy_environment(char **envp, Allocator alloc) { |
94 | int envc = 0; |
95 | for (char **env = envp; *env != 0; ++env) |
96 | ++envc; |
97 | |
98 | return copy_argument_vector(envc, envp, alloc); |
99 | } |
100 | |
101 | inline void handle_error(const char *msg) { |
102 | fprintf(stderr, format: "%s\n" , msg); |
103 | exit(EXIT_FAILURE); |
104 | } |
105 | |
106 | inline void handle_error(rpc_status_t) { |
107 | handle_error(msg: "Failure in the RPC server\n" ); |
108 | } |
109 | |
110 | template <uint32_t lane_size> |
111 | inline void register_rpc_callbacks(rpc_device_t device) { |
112 | static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size" ); |
113 | // Register the ping test for the `libc` tests. |
114 | rpc_register_callback( |
115 | device, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT), |
116 | [](rpc_port_t port, void *data) { |
117 | rpc_recv_and_send( |
118 | port, |
119 | [](rpc_buffer_t *buffer, void *data) { |
120 | reinterpret_cast<uint64_t *>(buffer->data)[0] += 1; |
121 | }, |
122 | data); |
123 | }, |
124 | nullptr); |
125 | |
126 | // Register the interface test callbacks. |
127 | rpc_register_callback( |
128 | device, static_cast<rpc_opcode_t>(RPC_TEST_INTERFACE), |
129 | [](rpc_port_t port, void *data) { |
130 | uint64_t cnt = 0; |
131 | bool end_with_recv; |
132 | rpc_recv( |
133 | port, |
134 | [](rpc_buffer_t *buffer, void *data) { |
135 | *reinterpret_cast<bool *>(data) = buffer->data[0]; |
136 | }, |
137 | &end_with_recv); |
138 | rpc_recv( |
139 | port, |
140 | [](rpc_buffer_t *buffer, void *data) { |
141 | *reinterpret_cast<uint64_t *>(data) = buffer->data[0]; |
142 | }, |
143 | &cnt); |
144 | rpc_send( |
145 | port, |
146 | [](rpc_buffer_t *buffer, void *data) { |
147 | uint64_t &cnt = *reinterpret_cast<uint64_t *>(data); |
148 | buffer->data[0] = cnt = cnt + 1; |
149 | }, |
150 | &cnt); |
151 | rpc_recv( |
152 | port, |
153 | [](rpc_buffer_t *buffer, void *data) { |
154 | *reinterpret_cast<uint64_t *>(data) = buffer->data[0]; |
155 | }, |
156 | &cnt); |
157 | rpc_send( |
158 | port, |
159 | [](rpc_buffer_t *buffer, void *data) { |
160 | uint64_t &cnt = *reinterpret_cast<uint64_t *>(data); |
161 | buffer->data[0] = cnt = cnt + 1; |
162 | }, |
163 | &cnt); |
164 | rpc_recv( |
165 | port, |
166 | [](rpc_buffer_t *buffer, void *data) { |
167 | *reinterpret_cast<uint64_t *>(data) = buffer->data[0]; |
168 | }, |
169 | &cnt); |
170 | rpc_recv( |
171 | port, |
172 | [](rpc_buffer_t *buffer, void *data) { |
173 | *reinterpret_cast<uint64_t *>(data) = buffer->data[0]; |
174 | }, |
175 | &cnt); |
176 | rpc_send( |
177 | port, |
178 | [](rpc_buffer_t *buffer, void *data) { |
179 | uint64_t &cnt = *reinterpret_cast<uint64_t *>(data); |
180 | buffer->data[0] = cnt = cnt + 1; |
181 | }, |
182 | &cnt); |
183 | rpc_send( |
184 | port, |
185 | [](rpc_buffer_t *buffer, void *data) { |
186 | uint64_t &cnt = *reinterpret_cast<uint64_t *>(data); |
187 | buffer->data[0] = cnt = cnt + 1; |
188 | }, |
189 | &cnt); |
190 | if (end_with_recv) |
191 | rpc_recv( |
192 | port, |
193 | [](rpc_buffer_t *buffer, void *data) { |
194 | *reinterpret_cast<uint64_t *>(data) = buffer->data[0]; |
195 | }, |
196 | &cnt); |
197 | else |
198 | rpc_send( |
199 | port, |
200 | [](rpc_buffer_t *buffer, void *data) { |
201 | uint64_t &cnt = *reinterpret_cast<uint64_t *>(data); |
202 | buffer->data[0] = cnt = cnt + 1; |
203 | }, |
204 | &cnt); |
205 | }, |
206 | nullptr); |
207 | |
208 | // Register the stream test handler. |
209 | rpc_register_callback( |
210 | device, static_cast<rpc_opcode_t>(RPC_TEST_STREAM), |
211 | [](rpc_port_t port, void *data) { |
212 | uint64_t sizes[lane_size] = {0}; |
213 | void *dst[lane_size] = {nullptr}; |
214 | rpc_recv_n( |
215 | port, dst, sizes, |
216 | [](uint64_t size, void *) -> void * { return new char[size]; }, |
217 | nullptr); |
218 | rpc_send_n(port, dst, sizes); |
219 | for (uint64_t i = 0; i < lane_size; ++i) { |
220 | if (dst[i]) |
221 | delete[] reinterpret_cast<uint8_t *>(dst[i]); |
222 | } |
223 | }, |
224 | nullptr); |
225 | } |
226 | |
227 | #endif |
228 | |