1//===-- Generic device loader interface -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
10#define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
11
12#include "utils/gpu/server/llvmlibc_rpc_server.h"
13
14#include "llvm-libc-types/rpc_opcodes_t.h"
15#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
16
17#include <cstddef>
18#include <cstdint>
19#include <cstdio>
20#include <cstdlib>
21#include <cstring>
22
23/// Generic launch parameters for configuration the number of blocks / threads.
24struct LaunchParameters {
25 uint32_t num_threads_x;
26 uint32_t num_threads_y;
27 uint32_t num_threads_z;
28 uint32_t num_blocks_x;
29 uint32_t num_blocks_y;
30 uint32_t num_blocks_z;
31};
32
33/// The arguments to the '_begin' kernel.
34struct begin_args_t {
35 int argc;
36 void *argv;
37 void *envp;
38};
39
40/// The arguments to the '_start' kernel.
41struct start_args_t {
42 int argc;
43 void *argv;
44 void *envp;
45 void *ret;
46};
47
48/// The arguments to the '_end' kernel.
49struct end_args_t {
50 int argc;
51};
52
53/// Generic interface to load the \p image and launch execution of the _start
54/// kernel on the target device. Copies \p argc and \p argv to the device.
55/// Returns the final value of the `main` function on the device.
56int load(int argc, char **argv, char **evnp, void *image, size_t size,
57 const LaunchParameters &params);
58
59/// Return \p V aligned "upwards" according to \p Align.
60template <typename V, typename A> inline V align_up(V val, A align) {
61 return ((val + V(align) - 1) / V(align)) * V(align);
62}
63
64/// Copy the system's argument vector to GPU memory allocated using \p alloc.
65template <typename Allocator>
66void *copy_argument_vector(int argc, char **argv, Allocator alloc) {
67 size_t argv_size = sizeof(char *) * (argc + 1);
68 size_t str_size = 0;
69 for (int i = 0; i < argc; ++i)
70 str_size += strlen(s: argv[i]) + 1;
71
72 // We allocate enough space for a null terminated array and all the strings.
73 void *dev_argv = alloc(argv_size + str_size);
74 if (!dev_argv)
75 return nullptr;
76
77 // Store the strings linerally in the same memory buffer.
78 void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
79 for (int i = 0; i < argc; ++i) {
80 size_t size = strlen(s: argv[i]) + 1;
81 std::memcpy(dest: dev_str, src: argv[i], n: size);
82 static_cast<void **>(dev_argv)[i] = dev_str;
83 dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
84 }
85
86 // Ensure the vector is null terminated.
87 reinterpret_cast<void **>(dev_argv)[argv_size] = nullptr;
88 return dev_argv;
89}
90
91/// Copy the system's environment to GPU memory allocated using \p alloc.
92template <typename Allocator>
93void *copy_environment(char **envp, Allocator alloc) {
94 int envc = 0;
95 for (char **env = envp; *env != 0; ++env)
96 ++envc;
97
98 return copy_argument_vector(envc, envp, alloc);
99}
100
101inline void handle_error(const char *msg) {
102 fprintf(stderr, format: "%s\n", msg);
103 exit(EXIT_FAILURE);
104}
105
106inline void handle_error(rpc_status_t) {
107 handle_error(msg: "Failure in the RPC server\n");
108}
109
110template <uint32_t lane_size>
111inline void register_rpc_callbacks(rpc_device_t device) {
112 static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size");
113 // Register the ping test for the `libc` tests.
114 rpc_register_callback(
115 device, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
116 [](rpc_port_t port, void *data) {
117 rpc_recv_and_send(
118 port,
119 [](rpc_buffer_t *buffer, void *data) {
120 reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
121 },
122 data);
123 },
124 nullptr);
125
126 // Register the interface test callbacks.
127 rpc_register_callback(
128 device, static_cast<rpc_opcode_t>(RPC_TEST_INTERFACE),
129 [](rpc_port_t port, void *data) {
130 uint64_t cnt = 0;
131 bool end_with_recv;
132 rpc_recv(
133 port,
134 [](rpc_buffer_t *buffer, void *data) {
135 *reinterpret_cast<bool *>(data) = buffer->data[0];
136 },
137 &end_with_recv);
138 rpc_recv(
139 port,
140 [](rpc_buffer_t *buffer, void *data) {
141 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
142 },
143 &cnt);
144 rpc_send(
145 port,
146 [](rpc_buffer_t *buffer, void *data) {
147 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
148 buffer->data[0] = cnt = cnt + 1;
149 },
150 &cnt);
151 rpc_recv(
152 port,
153 [](rpc_buffer_t *buffer, void *data) {
154 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
155 },
156 &cnt);
157 rpc_send(
158 port,
159 [](rpc_buffer_t *buffer, void *data) {
160 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
161 buffer->data[0] = cnt = cnt + 1;
162 },
163 &cnt);
164 rpc_recv(
165 port,
166 [](rpc_buffer_t *buffer, void *data) {
167 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
168 },
169 &cnt);
170 rpc_recv(
171 port,
172 [](rpc_buffer_t *buffer, void *data) {
173 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
174 },
175 &cnt);
176 rpc_send(
177 port,
178 [](rpc_buffer_t *buffer, void *data) {
179 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
180 buffer->data[0] = cnt = cnt + 1;
181 },
182 &cnt);
183 rpc_send(
184 port,
185 [](rpc_buffer_t *buffer, void *data) {
186 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
187 buffer->data[0] = cnt = cnt + 1;
188 },
189 &cnt);
190 if (end_with_recv)
191 rpc_recv(
192 port,
193 [](rpc_buffer_t *buffer, void *data) {
194 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
195 },
196 &cnt);
197 else
198 rpc_send(
199 port,
200 [](rpc_buffer_t *buffer, void *data) {
201 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
202 buffer->data[0] = cnt = cnt + 1;
203 },
204 &cnt);
205 },
206 nullptr);
207
208 // Register the stream test handler.
209 rpc_register_callback(
210 device, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
211 [](rpc_port_t port, void *data) {
212 uint64_t sizes[lane_size] = {0};
213 void *dst[lane_size] = {nullptr};
214 rpc_recv_n(
215 port, dst, sizes,
216 [](uint64_t size, void *) -> void * { return new char[size]; },
217 nullptr);
218 rpc_send_n(port, dst, sizes);
219 for (uint64_t i = 0; i < lane_size; ++i) {
220 if (dst[i])
221 delete[] reinterpret_cast<uint8_t *>(dst[i]);
222 }
223 },
224 nullptr);
225}
226
227#endif
228

source code of libc/utils/gpu/loader/Loader.h