1 | // RUN: %libomptarget-compilexx-run-and-check-generic |
---|---|
2 | // |
3 | // REQUIRES: gpu |
4 | |
5 | #include <assert.h> |
6 | #include <ompx.h> |
7 | #include <stdint.h> |
8 | #include <stdio.h> |
9 | #include <stdlib.h> |
10 | |
11 | #pragma omp begin declare variant match(device = {arch(amdgcn)}) |
12 | unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); } |
13 | #pragma omp end declare variant |
14 | |
15 | #pragma omp begin declare variant match(device = {arch(nvptx64)}) |
16 | unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); } |
17 | #pragma omp end declare variant |
18 | |
19 | #pragma omp begin declare variant match(device = {kind(cpu)}) |
20 | unsigned get_warp_size() { return 1; } |
21 | #pragma omp end declare variant |
22 | |
23 | int main(int argc, char *argv[]) { |
24 | const int num_blocks = 1; |
25 | const int block_size = 256; |
26 | const int N = num_blocks * block_size; |
27 | int *res = (int *)malloc(size: N * sizeof(int)); |
28 | |
29 | #pragma omp target teams ompx_bare num_teams(num_blocks) \ |
30 | thread_limit(block_size) map(from : res[0 : N]) |
31 | { |
32 | int tid = ompx_thread_id_x(); |
33 | uint64_t mask = ompx_ballot_sync(~0LU, tid & 0x1); |
34 | if (get_warp_size() == 64) |
35 | res[tid] = mask == 0xaaaaaaaaaaaaaaaa; |
36 | else |
37 | res[tid] = mask == 0xaaaaaaaa; |
38 | } |
39 | |
40 | for (int i = 0; i < N; ++i) |
41 | assert(res[i]); |
42 | |
43 | // CHECK: PASS |
44 | printf(format: "PASS\n"); |
45 | |
46 | free(ptr: res); |
47 | |
48 | return 0; |
49 | } |
50 |