| 1 | //===------- Offload API tests - olLaunchKernel --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "../common/Fixtures.hpp" |
| 10 | #include <OffloadAPI.h> |
| 11 | #include <gtest/gtest.h> |
| 12 | |
| 13 | struct LaunchKernelTestBase : OffloadQueueTest { |
| 14 | void SetUpProgram(const char *program) { |
| 15 | RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp()); |
| 16 | ASSERT_TRUE(TestEnvironment::loadDeviceBinary(program, Device, DeviceBin)); |
| 17 | ASSERT_GE(DeviceBin->getBufferSize(), 0lu); |
| 18 | ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(), |
| 19 | DeviceBin->getBufferSize(), &Program)); |
| 20 | |
| 21 | LaunchArgs.Dimensions = 1; |
| 22 | LaunchArgs.GroupSize = {64, 1, 1}; |
| 23 | LaunchArgs.NumGroups = {1, 1, 1}; |
| 24 | |
| 25 | LaunchArgs.DynSharedMemory = 0; |
| 26 | } |
| 27 | |
| 28 | void TearDown() override { |
| 29 | if (Program) { |
| 30 | olDestroyProgram(Program); |
| 31 | } |
| 32 | RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown()); |
| 33 | } |
| 34 | |
| 35 | std::unique_ptr<llvm::MemoryBuffer> DeviceBin; |
| 36 | ol_program_handle_t Program = nullptr; |
| 37 | ol_kernel_launch_size_args_t LaunchArgs{}; |
| 38 | }; |
| 39 | |
| 40 | struct LaunchSingleKernelTestBase : LaunchKernelTestBase { |
| 41 | void SetUpKernel(const char *kernel) { |
| 42 | RETURN_ON_FATAL_FAILURE(SetUpProgram(kernel)); |
| 43 | ASSERT_SUCCESS( |
| 44 | olGetSymbol(Program, kernel, OL_SYMBOL_KIND_KERNEL, &Kernel)); |
| 45 | } |
| 46 | |
| 47 | ol_symbol_handle_t Kernel = nullptr; |
| 48 | }; |
| 49 | |
| 50 | #define KERNEL_TEST(NAME, KERNEL) \ |
| 51 | struct olLaunchKernel##NAME##Test : LaunchSingleKernelTestBase { \ |
| 52 | void () override { SetUpKernel(#KERNEL); } \ |
| 53 | }; \ |
| 54 | OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test); |
| 55 | |
| 56 | KERNEL_TEST(Foo, foo) |
| 57 | KERNEL_TEST(NoArgs, noargs) |
| 58 | KERNEL_TEST(LocalMem, localmem) |
| 59 | KERNEL_TEST(LocalMemReduction, localmem_reduction) |
| 60 | KERNEL_TEST(LocalMemStatic, localmem_static) |
| 61 | KERNEL_TEST(GlobalCtor, global_ctor) |
| 62 | KERNEL_TEST(GlobalDtor, global_dtor) |
| 63 | |
| 64 | struct LaunchMultipleKernelTestBase : LaunchKernelTestBase { |
| 65 | void SetUpKernels(const char *program, std::vector<const char *> kernels) { |
| 66 | RETURN_ON_FATAL_FAILURE(SetUpProgram(program)); |
| 67 | |
| 68 | Kernels.resize(kernels.size()); |
| 69 | size_t I = 0; |
| 70 | for (auto K : kernels) |
| 71 | ASSERT_SUCCESS( |
| 72 | olGetSymbol(Program, K, OL_SYMBOL_KIND_KERNEL, &Kernels[I++])); |
| 73 | } |
| 74 | |
| 75 | std::vector<ol_symbol_handle_t> Kernels; |
| 76 | }; |
| 77 | |
| 78 | #define KERNEL_MULTI_TEST(NAME, PROGRAM, ...) \ |
| 79 | struct olLaunchKernel##NAME##Test : LaunchMultipleKernelTestBase { \ |
| 80 | void SetUp() override { SetUpKernels(#PROGRAM, {__VA_ARGS__}); } \ |
| 81 | }; \ |
| 82 | OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test); |
| 83 | |
| 84 | KERNEL_MULTI_TEST(Global, global, "write" , "read" ) |
| 85 | |
| 86 | (olLaunchKernelFooTest, Success) { |
| 87 | void *Mem; |
| 88 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 89 | LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); |
| 90 | struct { |
| 91 | void *Mem; |
| 92 | } Args{.Mem: Mem}; |
| 93 | |
| 94 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), |
| 95 | &LaunchArgs, nullptr)); |
| 96 | |
| 97 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 98 | |
| 99 | uint32_t *Data = (uint32_t *)Mem; |
| 100 | for (uint32_t i = 0; i < 64; i++) { |
| 101 | ASSERT_EQ(Data[i], i); |
| 102 | } |
| 103 | |
| 104 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 105 | } |
| 106 | |
| 107 | TEST_P(olLaunchKernelNoArgsTest, Success) { |
| 108 | ASSERT_SUCCESS( |
| 109 | olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr)); |
| 110 | |
| 111 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 112 | } |
| 113 | |
| 114 | (olLaunchKernelFooTest, SuccessSynchronous) { |
| 115 | void *Mem; |
| 116 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 117 | LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); |
| 118 | |
| 119 | struct { |
| 120 | void *Mem; |
| 121 | } Args{.Mem: Mem}; |
| 122 | |
| 123 | ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args), |
| 124 | &LaunchArgs, nullptr)); |
| 125 | |
| 126 | uint32_t *Data = (uint32_t *)Mem; |
| 127 | for (uint32_t i = 0; i < 64; i++) { |
| 128 | ASSERT_EQ(Data[i], i); |
| 129 | } |
| 130 | |
| 131 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 132 | } |
| 133 | |
| 134 | TEST_P(olLaunchKernelLocalMemTest, Success) { |
| 135 | LaunchArgs.NumGroups.x = 4; |
| 136 | LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t); |
| 137 | |
| 138 | void *Mem; |
| 139 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 140 | LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x * |
| 141 | sizeof(uint32_t), |
| 142 | &Mem)); |
| 143 | struct { |
| 144 | void *Mem; |
| 145 | } Args{.Mem: Mem}; |
| 146 | |
| 147 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), |
| 148 | &LaunchArgs, nullptr)); |
| 149 | |
| 150 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 151 | |
| 152 | uint32_t *Data = (uint32_t *)Mem; |
| 153 | for (uint32_t i = 0; i < LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x; i++) |
| 154 | ASSERT_EQ(Data[i], (i % 64) * 2); |
| 155 | |
| 156 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 157 | } |
| 158 | |
| 159 | TEST_P(olLaunchKernelLocalMemReductionTest, Success) { |
| 160 | LaunchArgs.NumGroups.x = 4; |
| 161 | LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t); |
| 162 | |
| 163 | void *Mem; |
| 164 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 165 | LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem)); |
| 166 | struct { |
| 167 | void *Mem; |
| 168 | } Args{.Mem: Mem}; |
| 169 | |
| 170 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), |
| 171 | &LaunchArgs, nullptr)); |
| 172 | |
| 173 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 174 | |
| 175 | uint32_t *Data = (uint32_t *)Mem; |
| 176 | for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++) |
| 177 | ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x); |
| 178 | |
| 179 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 180 | } |
| 181 | |
| 182 | TEST_P(olLaunchKernelLocalMemStaticTest, Success) { |
| 183 | LaunchArgs.NumGroups.x = 4; |
| 184 | LaunchArgs.DynSharedMemory = 0; |
| 185 | |
| 186 | void *Mem; |
| 187 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 188 | LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem)); |
| 189 | struct { |
| 190 | void *Mem; |
| 191 | } Args{.Mem: Mem}; |
| 192 | |
| 193 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), |
| 194 | &LaunchArgs, nullptr)); |
| 195 | |
| 196 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 197 | |
| 198 | uint32_t *Data = (uint32_t *)Mem; |
| 199 | for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++) |
| 200 | ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x); |
| 201 | |
| 202 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 203 | } |
| 204 | |
| 205 | TEST_P(olLaunchKernelGlobalTest, Success) { |
| 206 | void *Mem; |
| 207 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 208 | LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); |
| 209 | struct { |
| 210 | void *Mem; |
| 211 | } Args{.Mem: Mem}; |
| 212 | |
| 213 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0, |
| 214 | &LaunchArgs, nullptr)); |
| 215 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 216 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[1], &Args, sizeof(Args), |
| 217 | &LaunchArgs, nullptr)); |
| 218 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 219 | |
| 220 | uint32_t *Data = (uint32_t *)Mem; |
| 221 | for (uint32_t i = 0; i < 64; i++) { |
| 222 | ASSERT_EQ(Data[i], i * 2); |
| 223 | } |
| 224 | |
| 225 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 226 | } |
| 227 | |
| 228 | TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) { |
| 229 | ol_symbol_handle_t Global = nullptr; |
| 230 | ASSERT_SUCCESS( |
| 231 | olGetSymbol(Program, "global" , OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); |
| 232 | ASSERT_ERROR( |
| 233 | OL_ERRC_SYMBOL_KIND, |
| 234 | olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs, nullptr)); |
| 235 | } |
| 236 | |
| 237 | TEST_P(olLaunchKernelGlobalCtorTest, Success) { |
| 238 | void *Mem; |
| 239 | ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, |
| 240 | LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); |
| 241 | struct { |
| 242 | void *Mem; |
| 243 | } Args{.Mem: Mem}; |
| 244 | |
| 245 | ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), |
| 246 | &LaunchArgs, nullptr)); |
| 247 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 248 | |
| 249 | uint32_t *Data = (uint32_t *)Mem; |
| 250 | for (uint32_t i = 0; i < 64; i++) { |
| 251 | ASSERT_EQ(Data[i], i + 100); |
| 252 | } |
| 253 | |
| 254 | ASSERT_SUCCESS(olMemFree(Mem)); |
| 255 | } |
| 256 | |
| 257 | TEST_P(olLaunchKernelGlobalDtorTest, Success) { |
| 258 | // TODO: We can't inspect the result of a destructor yet, once we |
| 259 | // find/implement a way, update this test. For now we just check that nothing |
| 260 | // crashes |
| 261 | ASSERT_SUCCESS( |
| 262 | olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr)); |
| 263 | ASSERT_SUCCESS(olWaitQueue(Queue)); |
| 264 | } |
| 265 | |