1// RUN: %libomptarget-compilexx-run-and-check-generic
2//
3// REQUIRES: gpu
4
5#include <cassert>
6#include <cmath>
7#include <cstdint>
8#include <cstdio>
9#include <limits>
10#include <ompx.h>
11#include <type_traits>
12
13#pragma omp begin declare variant match(device = {arch(amdgcn)})
14unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); }
15#pragma omp end declare variant
16
17#pragma omp begin declare variant match(device = {arch(nvptx64)})
18unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); }
19#pragma omp end declare variant
20
21#pragma omp begin declare variant match(device = {kind(cpu)})
22unsigned get_warp_size() { return 1; }
23#pragma omp end declare variant
24
25template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
26bool equal(T LHS, T RHS) {
27 return LHS == RHS;
28}
29
30template <typename T,
31 std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
32bool equal(T LHS, T RHS) {
33 return __builtin_fabs(LHS - RHS) < std::numeric_limits<T>::epsilon();
34}
35
36template <typename T> void test() {
37 constexpr const int num_blocks = 1;
38 constexpr const int block_size = 256;
39 constexpr const int N = num_blocks * block_size;
40 int *res = new int[N];
41
42#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \
43 map(from: res[0:N])
44 {
45 int tid = ompx_thread_id_x();
46 T val = ompx::shfl_down_sync(~0U, static_cast<T>(tid), 1);
47 int warp_size = get_warp_size();
48 if ((tid & (warp_size - 1)) != warp_size - 1)
49 res[tid] = equal(val, static_cast<T>(tid + 1));
50 else
51 res[tid] = equal(val, static_cast<T>(tid));
52 }
53
54 for (int i = 0; i < N; ++i)
55 assert(res[i]);
56
57 delete[] res;
58}
59
60int main(int argc, char *argv[]) {
61 test<int32_t>();
62 test<int64_t>();
63 test<float>();
64 test<double>();
65 // CHECK: PASS
66 printf(format: "PASS\n");
67
68 return 0;
69}
70

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of offload/test/offloading/ompx_bare_shfl_down_sync.cpp