1//===------- Offload API tests - olLaunchKernel --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "../common/Fixtures.hpp"
10#include <OffloadAPI.h>
11#include <gtest/gtest.h>
12
13struct LaunchKernelTestBase : OffloadQueueTest {
14 void SetUpProgram(const char *program) {
15 RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
16 ASSERT_TRUE(TestEnvironment::loadDeviceBinary(program, Device, DeviceBin));
17 ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
18 ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
19 DeviceBin->getBufferSize(), &Program));
20
21 LaunchArgs.Dimensions = 1;
22 LaunchArgs.GroupSize = {64, 1, 1};
23 LaunchArgs.NumGroups = {1, 1, 1};
24
25 LaunchArgs.DynSharedMemory = 0;
26 }
27
28 void TearDown() override {
29 if (Program) {
30 olDestroyProgram(Program);
31 }
32 RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown());
33 }
34
35 std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
36 ol_program_handle_t Program = nullptr;
37 ol_kernel_launch_size_args_t LaunchArgs{};
38};
39
40struct LaunchSingleKernelTestBase : LaunchKernelTestBase {
41 void SetUpKernel(const char *kernel) {
42 RETURN_ON_FATAL_FAILURE(SetUpProgram(kernel));
43 ASSERT_SUCCESS(
44 olGetSymbol(Program, kernel, OL_SYMBOL_KIND_KERNEL, &Kernel));
45 }
46
47 ol_symbol_handle_t Kernel = nullptr;
48};
49
50#define KERNEL_TEST(NAME, KERNEL) \
51 struct olLaunchKernel##NAME##Test : LaunchSingleKernelTestBase { \
52 void SetUp() override { SetUpKernel(#KERNEL); } \
53 }; \
54 OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test);
55
56KERNEL_TEST(Foo, foo)
57KERNEL_TEST(NoArgs, noargs)
58KERNEL_TEST(LocalMem, localmem)
59KERNEL_TEST(LocalMemReduction, localmem_reduction)
60KERNEL_TEST(LocalMemStatic, localmem_static)
61KERNEL_TEST(GlobalCtor, global_ctor)
62KERNEL_TEST(GlobalDtor, global_dtor)
63
64struct LaunchMultipleKernelTestBase : LaunchKernelTestBase {
65 void SetUpKernels(const char *program, std::vector<const char *> kernels) {
66 RETURN_ON_FATAL_FAILURE(SetUpProgram(program));
67
68 Kernels.resize(kernels.size());
69 size_t I = 0;
70 for (auto K : kernels)
71 ASSERT_SUCCESS(
72 olGetSymbol(Program, K, OL_SYMBOL_KIND_KERNEL, &Kernels[I++]));
73 }
74
75 std::vector<ol_symbol_handle_t> Kernels;
76};
77
78#define KERNEL_MULTI_TEST(NAME, PROGRAM, ...) \
79 struct olLaunchKernel##NAME##Test : LaunchMultipleKernelTestBase { \
80 void SetUp() override { SetUpKernels(#PROGRAM, {__VA_ARGS__}); } \
81 }; \
82 OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test);
83
84KERNEL_MULTI_TEST(Global, global, "write", "read")
85
86TEST_P(olLaunchKernelFooTest, Success) {
87 void *Mem;
88 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
89 LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
90 struct {
91 void *Mem;
92 } Args{.Mem: Mem};
93
94 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
95 &LaunchArgs, nullptr));
96
97 ASSERT_SUCCESS(olWaitQueue(Queue));
98
99 uint32_t *Data = (uint32_t *)Mem;
100 for (uint32_t i = 0; i < 64; i++) {
101 ASSERT_EQ(Data[i], i);
102 }
103
104 ASSERT_SUCCESS(olMemFree(Mem));
105}
106
107TEST_P(olLaunchKernelNoArgsTest, Success) {
108 ASSERT_SUCCESS(
109 olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr));
110
111 ASSERT_SUCCESS(olWaitQueue(Queue));
112}
113
114TEST_P(olLaunchKernelFooTest, SuccessSynchronous) {
115 void *Mem;
116 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
117 LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
118
119 struct {
120 void *Mem;
121 } Args{.Mem: Mem};
122
123 ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args),
124 &LaunchArgs, nullptr));
125
126 uint32_t *Data = (uint32_t *)Mem;
127 for (uint32_t i = 0; i < 64; i++) {
128 ASSERT_EQ(Data[i], i);
129 }
130
131 ASSERT_SUCCESS(olMemFree(Mem));
132}
133
134TEST_P(olLaunchKernelLocalMemTest, Success) {
135 LaunchArgs.NumGroups.x = 4;
136 LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t);
137
138 void *Mem;
139 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
140 LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x *
141 sizeof(uint32_t),
142 &Mem));
143 struct {
144 void *Mem;
145 } Args{.Mem: Mem};
146
147 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
148 &LaunchArgs, nullptr));
149
150 ASSERT_SUCCESS(olWaitQueue(Queue));
151
152 uint32_t *Data = (uint32_t *)Mem;
153 for (uint32_t i = 0; i < LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x; i++)
154 ASSERT_EQ(Data[i], (i % 64) * 2);
155
156 ASSERT_SUCCESS(olMemFree(Mem));
157}
158
159TEST_P(olLaunchKernelLocalMemReductionTest, Success) {
160 LaunchArgs.NumGroups.x = 4;
161 LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t);
162
163 void *Mem;
164 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
165 LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem));
166 struct {
167 void *Mem;
168 } Args{.Mem: Mem};
169
170 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
171 &LaunchArgs, nullptr));
172
173 ASSERT_SUCCESS(olWaitQueue(Queue));
174
175 uint32_t *Data = (uint32_t *)Mem;
176 for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
177 ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);
178
179 ASSERT_SUCCESS(olMemFree(Mem));
180}
181
182TEST_P(olLaunchKernelLocalMemStaticTest, Success) {
183 LaunchArgs.NumGroups.x = 4;
184 LaunchArgs.DynSharedMemory = 0;
185
186 void *Mem;
187 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
188 LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem));
189 struct {
190 void *Mem;
191 } Args{.Mem: Mem};
192
193 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
194 &LaunchArgs, nullptr));
195
196 ASSERT_SUCCESS(olWaitQueue(Queue));
197
198 uint32_t *Data = (uint32_t *)Mem;
199 for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
200 ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);
201
202 ASSERT_SUCCESS(olMemFree(Mem));
203}
204
205TEST_P(olLaunchKernelGlobalTest, Success) {
206 void *Mem;
207 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
208 LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
209 struct {
210 void *Mem;
211 } Args{.Mem: Mem};
212
213 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0,
214 &LaunchArgs, nullptr));
215 ASSERT_SUCCESS(olWaitQueue(Queue));
216 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[1], &Args, sizeof(Args),
217 &LaunchArgs, nullptr));
218 ASSERT_SUCCESS(olWaitQueue(Queue));
219
220 uint32_t *Data = (uint32_t *)Mem;
221 for (uint32_t i = 0; i < 64; i++) {
222 ASSERT_EQ(Data[i], i * 2);
223 }
224
225 ASSERT_SUCCESS(olMemFree(Mem));
226}
227
228TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) {
229 ol_symbol_handle_t Global = nullptr;
230 ASSERT_SUCCESS(
231 olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global));
232 ASSERT_ERROR(
233 OL_ERRC_SYMBOL_KIND,
234 olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs, nullptr));
235}
236
237TEST_P(olLaunchKernelGlobalCtorTest, Success) {
238 void *Mem;
239 ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
240 LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
241 struct {
242 void *Mem;
243 } Args{.Mem: Mem};
244
245 ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
246 &LaunchArgs, nullptr));
247 ASSERT_SUCCESS(olWaitQueue(Queue));
248
249 uint32_t *Data = (uint32_t *)Mem;
250 for (uint32_t i = 0; i < 64; i++) {
251 ASSERT_EQ(Data[i], i + 100);
252 }
253
254 ASSERT_SUCCESS(olMemFree(Mem));
255}
256
257TEST_P(olLaunchKernelGlobalDtorTest, Success) {
258 // TODO: We can't inspect the result of a destructor yet, once we
259 // find/implement a way, update this test. For now we just check that nothing
260 // crashes
261 ASSERT_SUCCESS(
262 olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr));
263 ASSERT_SUCCESS(olWaitQueue(Queue));
264}
265

source code of offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp