1 | // RUN: %libomp-cxx-compile |
2 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run |
3 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run |
4 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run |
5 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run |
6 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run |
7 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run |
8 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run |
9 | // |
10 | // RUN: %libomp-cxx-compile -DUSE_HIDDEN_HELPERS=1 |
11 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run |
12 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run |
13 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run |
14 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run |
15 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run |
16 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run |
17 | // RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run |
18 | |
19 | // This test stresses the task team mechanism by running a simple |
20 | // increment task over and over with varying number of threads and nesting. |
21 | // The test covers nested serial teams and mixing serial teams with |
22 | // normal active teams. |
23 | |
24 | #include <assert.h> |
25 | #include <stdio.h> |
26 | #include <stdlib.h> |
27 | #include <omp.h> |
28 | |
29 | // The number of times to run each test |
30 | #define NTIMES 5 |
31 | |
32 | // Regular single increment task |
33 | void task_inc_a(int *a) { |
34 | #pragma omp task |
35 | { |
36 | #pragma omp atomic |
37 | (*a)++; |
38 | } |
39 | } |
40 | |
41 | // Splitting increment task that binary splits the incrementing task |
42 | void task_inc_split_a(int *a, int low, int high) { |
43 | #pragma omp task firstprivate(low, high) |
44 | { |
45 | if (low == high) { |
46 | #pragma omp atomic |
47 | (*a)++; |
48 | } else if (low < high) { |
49 | int mid = (high - low) / 2 + low; |
50 | task_inc_split_a(a, low, high: mid); |
51 | task_inc_split_a(a, low: mid + 1, high); |
52 | } |
53 | } |
54 | } |
55 | |
56 | #ifdef USE_HIDDEN_HELPERS |
57 | // Hidden helper tasks force serial regions to create task teams |
58 | void task_inc_a_hidden_helper(int *a) { |
59 | #pragma omp target map(tofrom : a[0]) nowait |
60 | { |
61 | #pragma omp atomic |
62 | (*a)++; |
63 | } |
64 | } |
65 | #else |
66 | // Detached tasks force serial regions to create task teams |
67 | void task_inc_a_detached(int *a, omp_event_handle_t handle) { |
68 | #pragma omp task detach(handle) |
69 | { |
70 | #pragma omp atomic |
71 | (*a)++; |
72 | omp_fulfill_event(handle); |
73 | } |
74 | } |
75 | #endif |
76 | |
77 | void check_a(int *a, int expected) { |
78 | if (*a != expected) { |
79 | fprintf(stderr, |
80 | format: "FAIL: a = %d instead of expected = %d. Compile with " |
81 | "-DVERBOSE for more verbose output.\n" , |
82 | *a, expected); |
83 | exit(EXIT_FAILURE); |
84 | } |
85 | } |
86 | |
87 | // Every thread creates a single "increment" task |
88 | void test_tasks(omp_event_handle_t *handles, int expected, int *a) { |
89 | int tid = omp_get_thread_num(); |
90 | |
91 | task_inc_a(a); |
92 | |
93 | #pragma omp barrier |
94 | check_a(a, expected); |
95 | #pragma omp barrier |
96 | check_a(a, expected); |
97 | #pragma omp barrier |
98 | |
99 | #ifdef USE_HIDDEN_HELPERS |
100 | task_inc_a_hidden_helper(a); |
101 | #else |
102 | task_inc_a_detached(a, handles[tid]); |
103 | #endif |
104 | |
105 | #pragma omp barrier |
106 | check_a(a, expected: 2 * expected); |
107 | #pragma omp barrier |
108 | task_inc_a(a); |
109 | #pragma omp barrier |
110 | check_a(a, expected: 3 * expected); |
111 | } |
112 | |
113 | // Testing single level of parallelism with increment tasks |
114 | void test_base(int nthreads) { |
115 | #ifdef VERBOSE |
116 | #pragma omp master |
117 | printf(" test_base(%d)\n" , nthreads); |
118 | #endif |
119 | int a = 0; |
120 | omp_event_handle_t *handles; |
121 | handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads); |
122 | #pragma omp parallel num_threads(nthreads) shared(a) |
123 | { test_tasks(handles, nthreads, &a); } |
124 | free(handles); |
125 | } |
126 | |
127 | // Testing nested parallel with increment tasks |
128 | // first = nthreads of outer parallel |
129 | // second = nthreads of nested parallel |
130 | void test_nest(int first, int second) { |
131 | #ifdef VERBOSE |
132 | #pragma omp master |
133 | printf(" test_nest(%d, %d)\n" , first, second); |
134 | #endif |
135 | #pragma omp parallel num_threads(first) |
136 | { test_base(nthreads: second); } |
137 | } |
138 | |
139 | // Testing 2-level nested parallels with increment tasks |
140 | // first = nthreads of outer parallel |
141 | // second = nthreads of nested parallel |
142 | // third = nthreads of second nested parallel |
143 | void test_nest2(int first, int second, int third) { |
144 | #ifdef VERBOSE |
145 | #pragma omp master |
146 | printf(" test_nest2(%d, %d, %d)\n" , first, second, third); |
147 | #endif |
148 | #pragma omp parallel num_threads(first) |
149 | { test_nest(first: second, second: third); } |
150 | } |
151 | |
152 | // Testing 3-level nested parallels with increment tasks |
153 | // first = nthreads of outer parallel |
154 | // second = nthreads of nested parallel |
155 | // third = nthreads of second nested parallel |
156 | // fourth = nthreads of third nested parallel |
157 | void test_nest3(int first, int second, int third, int fourth) { |
158 | #ifdef VERBOSE |
159 | #pragma omp master |
160 | printf(" test_nest3(%d, %d, %d, %d)\n" , first, second, third, fourth); |
161 | #endif |
162 | #pragma omp parallel num_threads(first) |
163 | { test_nest2(first: second, second: third, third: fourth); } |
164 | } |
165 | |
166 | // Testing 4-level nested parallels with increment tasks |
167 | // first = nthreads of outer parallel |
168 | // second = nthreads of nested parallel |
169 | // third = nthreads of second nested parallel |
170 | // fourth = nthreads of third nested parallel |
171 | // fifth = nthreads of fourth nested parallel |
172 | void test_nest4(int first, int second, int third, int fourth, int fifth) { |
173 | #ifdef VERBOSE |
174 | #pragma omp master |
175 | printf("test_nest4(%d, %d, %d, %d, %d)\n" , first, second, third, fourth, |
176 | fifth); |
177 | #endif |
178 | #pragma omp parallel num_threads(first) |
179 | { test_nest3(first: second, second: third, third: fourth, fourth: fifth); } |
180 | } |
181 | |
182 | // Single thread starts a binary splitting "increment" task |
183 | // Detached tasks are still single "increment" task |
184 | void test_tasks_split(omp_event_handle_t *handles, int expected, int *a) { |
185 | int tid = omp_get_thread_num(); |
186 | |
187 | #pragma omp single |
188 | task_inc_split_a(a, low: 1, high: expected); // task team A |
189 | |
190 | #pragma omp barrier |
191 | check_a(a, expected); |
192 | #pragma omp barrier |
193 | check_a(a, expected); |
194 | #pragma omp barrier |
195 | |
196 | #ifdef USE_HIDDEN_HELPERS |
197 | task_inc_a_hidden_helper(a); |
198 | #else |
199 | task_inc_a_detached(a, handles[tid]); |
200 | #endif |
201 | |
202 | #pragma omp barrier |
203 | check_a(a, expected: 2 * expected); |
204 | #pragma omp barrier |
205 | #pragma omp single |
206 | task_inc_split_a(a, low: 1, high: expected); // task team B |
207 | #pragma omp barrier |
208 | check_a(a, expected: 3 * expected); |
209 | } |
210 | |
211 | // Testing single level of parallelism with splitting incrementing tasks |
212 | void test_base_split(int nthreads) { |
213 | #ifdef VERBOSE |
214 | #pragma omp master |
215 | printf(" test_base_split(%d)\n" , nthreads); |
216 | #endif |
217 | int a = 0; |
218 | omp_event_handle_t *handles; |
219 | handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads); |
220 | #pragma omp parallel num_threads(nthreads) shared(a) |
221 | { test_tasks_split(handles, nthreads, &a); } |
222 | free(handles); |
223 | } |
224 | |
225 | // Testing nested parallels with splitting tasks |
226 | // first = nthreads of outer parallel |
227 | // second = nthreads of nested parallel |
228 | void test_nest_split(int first, int second) { |
229 | #ifdef VERBOSE |
230 | #pragma omp master |
231 | printf(" test_nest_split(%d, %d)\n" , first, second); |
232 | #endif |
233 | #pragma omp parallel num_threads(first) |
234 | { test_base_split(nthreads: second); } |
235 | } |
236 | |
237 | // Testing doubly nested parallels with splitting tasks |
238 | // first = nthreads of outer parallel |
239 | // second = nthreads of nested parallel |
240 | // third = nthreads of second nested parallel |
241 | void test_nest2_split(int first, int second, int third) { |
242 | #ifdef VERBOSE |
243 | #pragma omp master |
244 | printf("test_nest2_split(%d, %d, %d)\n" , first, second, third); |
245 | #endif |
246 | #pragma omp parallel num_threads(first) |
247 | { test_nest_split(first: second, second: third); } |
248 | } |
249 | |
250 | template <typename... Args> |
251 | void run_ntimes(int n, void (*func)(Args...), Args... args) { |
252 | for (int i = 0; i < n; ++i) { |
253 | func(args...); |
254 | } |
255 | } |
256 | |
257 | int main() { |
258 | omp_set_max_active_levels(5); |
259 | |
260 | run_ntimes(NTIMES, func: test_base, args: 4); |
261 | run_ntimes(NTIMES, func: test_base, args: 1); |
262 | run_ntimes(NTIMES, func: test_base, args: 8); |
263 | run_ntimes(NTIMES, func: test_base, args: 2); |
264 | run_ntimes(NTIMES, func: test_base, args: 6); |
265 | run_ntimes(NTIMES, func: test_nest, args: 1, args: 1); |
266 | run_ntimes(NTIMES, func: test_nest, args: 1, args: 5); |
267 | run_ntimes(NTIMES, func: test_nest, args: 2, args: 6); |
268 | run_ntimes(NTIMES, func: test_nest, args: 1, args: 1); |
269 | run_ntimes(NTIMES, func: test_nest, args: 4, args: 3); |
270 | run_ntimes(NTIMES, func: test_nest, args: 3, args: 2); |
271 | run_ntimes(NTIMES, func: test_nest, args: 1, args: 1); |
272 | run_ntimes(NTIMES, func: test_nest2, args: 1, args: 1, args: 2); |
273 | run_ntimes(NTIMES, func: test_nest2, args: 1, args: 2, args: 1); |
274 | run_ntimes(NTIMES, func: test_nest2, args: 2, args: 2, args: 1); |
275 | run_ntimes(NTIMES, func: test_nest2, args: 2, args: 1, args: 1); |
276 | run_ntimes(NTIMES, func: test_nest2, args: 4, args: 2, args: 1); |
277 | run_ntimes(NTIMES, func: test_nest2, args: 4, args: 2, args: 2); |
278 | run_ntimes(NTIMES, func: test_nest2, args: 1, args: 1, args: 1); |
279 | run_ntimes(NTIMES, func: test_nest2, args: 4, args: 2, args: 2); |
280 | run_ntimes(NTIMES, func: test_nest3, args: 1, args: 1, args: 1, args: 1); |
281 | run_ntimes(NTIMES, func: test_nest3, args: 1, args: 2, args: 1, args: 1); |
282 | run_ntimes(NTIMES, func: test_nest3, args: 1, args: 1, args: 2, args: 1); |
283 | run_ntimes(NTIMES, func: test_nest3, args: 1, args: 1, args: 1, args: 2); |
284 | run_ntimes(NTIMES, func: test_nest3, args: 2, args: 1, args: 1, args: 1); |
285 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 1, args: 1, args: 1, args: 1); |
286 | run_ntimes(NTIMES, func: test_nest4, args: 2, args: 1, args: 1, args: 1, args: 1); |
287 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 2, args: 1, args: 1, args: 1); |
288 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 1, args: 2, args: 1, args: 1); |
289 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 1, args: 1, args: 2, args: 1); |
290 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 1, args: 1, args: 1, args: 2); |
291 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 1, args: 1, args: 1, args: 1); |
292 | run_ntimes(NTIMES, func: test_nest4, args: 1, args: 2, args: 1, args: 2, args: 1); |
293 | |
294 | run_ntimes(NTIMES, func: test_base_split, args: 4); |
295 | run_ntimes(NTIMES, func: test_base_split, args: 2); |
296 | |
297 | run_ntimes(NTIMES, func: test_base_split, args: 7); |
298 | |
299 | run_ntimes(NTIMES, func: test_base_split, args: 1); |
300 | run_ntimes(NTIMES, func: test_nest_split, args: 4, args: 2); |
301 | run_ntimes(NTIMES, func: test_nest_split, args: 2, args: 1); |
302 | |
303 | run_ntimes(NTIMES, func: test_nest_split, args: 7, args: 2); |
304 | run_ntimes(NTIMES, func: test_nest_split, args: 1, args: 1); |
305 | run_ntimes(NTIMES, func: test_nest_split, args: 1, args: 4); |
306 | |
307 | run_ntimes(NTIMES, func: test_nest2_split, args: 1, args: 1, args: 2); |
308 | run_ntimes(NTIMES, func: test_nest2_split, args: 1, args: 2, args: 1); |
309 | run_ntimes(NTIMES, func: test_nest2_split, args: 2, args: 2, args: 1); |
310 | run_ntimes(NTIMES, func: test_nest2_split, args: 2, args: 1, args: 1); |
311 | run_ntimes(NTIMES, func: test_nest2_split, args: 4, args: 2, args: 1); |
312 | run_ntimes(NTIMES, func: test_nest2_split, args: 4, args: 2, args: 2); |
313 | run_ntimes(NTIMES, func: test_nest2_split, args: 1, args: 1, args: 1); |
314 | run_ntimes(NTIMES, func: test_nest2_split, args: 4, args: 2, args: 2); |
315 | |
316 | printf(format: "PASS\n" ); |
317 | return EXIT_SUCCESS; |
318 | } |
319 | |