1 | //===-- Test for the parallel scan and reduction operations on the GPU ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "src/__support/CPP/bit.h" |
10 | #include "src/__support/GPU/utils.h" |
11 | #include "test/IntegrationTest/test.h" |
12 | |
13 | using namespace LIBC_NAMESPACE; |
14 | |
15 | static uint32_t sum(uint32_t n) { return n * (n + 1) / 2; } |
16 | |
17 | // Tests a reduction within a convergant warp or wavefront using some known |
18 | // values. For example, if every element in the lane is one, then the sum should |
19 | // be the size of the warp or wavefront, i.e. 1 + 1 + 1 ... + 1. |
20 | static void test_reduce() { |
21 | uint64_t mask = gpu::get_lane_mask(); |
22 | uint32_t x = gpu::reduce(lane_mask: mask, x: 1); |
23 | EXPECT_EQ(x, gpu::get_lane_size()); |
24 | |
25 | uint32_t y = gpu::reduce(lane_mask: mask, x: gpu::get_lane_id()); |
26 | EXPECT_EQ(y, sum(gpu::get_lane_size() - 1)); |
27 | |
28 | uint32_t z = 0; |
29 | if (gpu::get_lane_id() % 2) |
30 | z = gpu::reduce(lane_mask: gpu::get_lane_mask(), x: 1); |
31 | gpu::sync_lane(mask); |
32 | |
33 | EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_size() / 2 : 0); |
34 | } |
35 | |
36 | // Tests an accumulation scan within a convergent warp or wavefront using some |
37 | // known values. For example, if every element in the lane is one, then the scan |
38 | // should have each element be equivalent to its ID, i.e. 1, 1 + 1, ... |
39 | static void test_scan() { |
40 | uint64_t mask = gpu::get_lane_mask(); |
41 | |
42 | uint32_t x = gpu::scan(lane_mask: mask, x: 1); |
43 | EXPECT_EQ(x, gpu::get_lane_id() + 1); |
44 | |
45 | uint32_t y = gpu::scan(lane_mask: mask, x: gpu::get_lane_id()); |
46 | EXPECT_EQ(y, sum(gpu::get_lane_id())); |
47 | |
48 | uint32_t z = 0; |
49 | if (gpu::get_lane_id() % 2) |
50 | z = gpu::scan(lane_mask: gpu::get_lane_mask(), x: 1); |
51 | gpu::sync_lane(mask); |
52 | |
53 | EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_id() / 2 + 1 : 0); |
54 | } |
55 | |
56 | TEST_MAIN(int argc, char **argv, char **envp) { |
57 | test_reduce(); |
58 | |
59 | test_scan(); |
60 | |
61 | return 0; |
62 | } |
63 | |