1 | // clang-format off |
2 | // RUN: %libomptarget-compile-generic |
3 | // RUN: env LIBOMPTARGET_INFO=16 \ |
4 | // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=DEFAULT |
5 | |
6 | // UNSUPPORTED: nvptx64-nvidia-cuda |
7 | // UNSUPPORTED: nvptx64-nvidia-cuda-LTO |
8 | // UNSUPPORTED: aarch64-unknown-linux-gnu |
9 | // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO |
10 | // UNSUPPORTED: x86_64-pc-linux-gnu |
11 | // UNSUPPORTED: x86_64-pc-linux-gnu-LTO |
12 | // UNSUPPORTED: s390x-ibm-linux-gnu |
13 | // UNSUPPORTED: s390x-ibm-linux-gnu-LTO |
14 | |
15 | __attribute__((optnone)) int optnone() { return 1; } |
16 | |
17 | int main() { |
18 | int N = optnone() * 4098 * 32; |
19 | |
20 | // DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]] |
21 | #pragma omp target teams distribute parallel for simd |
22 | for (int i = 0; i < N; ++i) { |
23 | optnone(); |
24 | } |
25 | // DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]] |
26 | #pragma omp target teams distribute parallel for simd |
27 | for (int i = 0; i < N; ++i) { |
28 | optnone(); |
29 | } |
30 | // DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]] |
31 | #pragma omp target teams distribute parallel for simd |
32 | for (int i = 0; i < N; ++i) { |
33 | optnone(); |
34 | } |
35 | // DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]] |
36 | #pragma omp target |
37 | #pragma omp teams distribute parallel for |
38 | for (int i = 0; i < N; ++i) { |
39 | optnone(); |
40 | } |
41 | // DEFAULT: 42 (MaxFlatWorkGroupSize: 1024 |
42 | #pragma omp target thread_limit(optnone() * 42) |
43 | #pragma omp teams distribute parallel for |
44 | for (int i = 0; i < N; ++i) { |
45 | optnone(); |
46 | } |
47 | // DEFAULT: 42 (MaxFlatWorkGroupSize: 42 |
48 | #pragma omp target thread_limit(optnone() * 42) ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42)))) |
49 | #pragma omp teams distribute parallel for |
50 | for (int i = 0; i < N; ++i) { |
51 | optnone(); |
52 | } |
53 | // DEFAULT: 42 (MaxFlatWorkGroupSize: 42 |
54 | #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42)))) |
55 | #pragma omp teams distribute parallel for |
56 | for (int i = 0; i < N; ++i) { |
57 | optnone(); |
58 | } |
59 | // DEFAULT: MaxFlatWorkGroupSize: 1024 |
60 | #pragma omp target |
61 | #pragma omp teams distribute parallel for num_threads(optnone() * 42) |
62 | for (int i = 0; i < N; ++i) { |
63 | optnone(); |
64 | } |
65 | // DEFAULT: MaxFlatWorkGroupSize: 1024 |
66 | #pragma omp target teams distribute parallel for thread_limit(optnone() * 42) |
67 | for (int i = 0; i < N; ++i) { |
68 | optnone(); |
69 | } |
70 | // DEFAULT: MaxFlatWorkGroupSize: 1024 |
71 | #pragma omp target teams distribute parallel for num_threads(optnone() * 42) |
72 | for (int i = 0; i < N; ++i) { |
73 | optnone(); |
74 | } |
75 | // DEFAULT: 9 (MaxFlatWorkGroupSize: 9 |
76 | #pragma omp target |
77 | #pragma omp teams distribute parallel for num_threads(9) |
78 | for (int i = 0; i < N; ++i) { |
79 | optnone(); |
80 | } |
81 | // DEFAULT: 4 (MaxFlatWorkGroupSize: 4 |
82 | #pragma omp target thread_limit(4) |
83 | #pragma omp teams distribute parallel for |
84 | for (int i = 0; i < N; ++i) { |
85 | optnone(); |
86 | } |
87 | // DEFAULT: 4 (MaxFlatWorkGroupSize: 4 |
88 | #pragma omp target |
89 | #pragma omp teams distribute parallel for thread_limit(4) |
90 | for (int i = 0; i < N; ++i) { |
91 | optnone(); |
92 | } |
93 | // DEFAULT: 9 (MaxFlatWorkGroupSize: 9 |
94 | #pragma omp target teams distribute parallel for num_threads(9) |
95 | for (int i = 0; i < N; ++i) { |
96 | optnone(); |
97 | } |
98 | // DEFAULT: 4 (MaxFlatWorkGroupSize: 4 |
99 | #pragma omp target teams distribute parallel for simd thread_limit(4) |
100 | for (int i = 0; i < N; ++i) { |
101 | optnone(); |
102 | } |
103 | } |
104 | |
105 | |