1 | // RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 %libomp-run |
2 | #include <stdio.h> |
3 | #include "omp_testsuite.h" |
4 | |
5 | // When compiler supports num_threads clause list format, remove the following |
6 | // and use num_threads clause directly |
7 | #if defined(__cplusplus) |
8 | extern "C" { |
9 | #endif |
10 | |
11 | int __kmpc_global_thread_num(void *loc); |
12 | void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, |
13 | int *list); |
14 | |
15 | #if defined(__cplusplus) |
16 | } |
17 | #endif |
18 | |
19 | int test_omp_parallel_num_threads_list() { |
20 | int num_failed = 0; |
21 | |
22 | // Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level |
23 | // Check top 3 levels |
24 | #pragma omp parallel reduction(+ : num_failed) // 1st level |
25 | { |
26 | #pragma omp single |
27 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
28 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
29 | { |
30 | #pragma omp single |
31 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
32 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
33 | { |
34 | #pragma omp single |
35 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
36 | } // end 3rd level parallel |
37 | } // end 2nd level parallel |
38 | } // end 1st level parallel |
39 | |
40 | // Make sure that basic single element num_threads clause works |
41 | #pragma omp parallel reduction(+ : num_failed) num_threads(4) // 1st level |
42 | { |
43 | #pragma omp single |
44 | num_failed = num_failed + !(omp_get_num_threads() == 4); |
45 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
46 | { |
47 | #pragma omp single |
48 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
49 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
50 | { |
51 | #pragma omp single |
52 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
53 | } // end 3rd level parallel |
54 | } // end 2nd level parallel |
55 | } // end 1st level parallel |
56 | |
57 | // Check that basic single element num_threads clause works on second level |
58 | #pragma omp parallel reduction(+ : num_failed) // 1st level |
59 | { |
60 | #pragma omp single |
61 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
62 | #pragma omp parallel reduction(+ : num_failed) num_threads(4) // 2nd level |
63 | { |
64 | #pragma omp single |
65 | num_failed = num_failed + !(omp_get_num_threads() == 4); |
66 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
67 | { |
68 | #pragma omp single |
69 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
70 | } // end 3rd level parallel |
71 | } // end 2nd level parallel |
72 | } // end 1st level parallel |
73 | |
74 | // Try a short list. It should completely overwrite the old settings. |
75 | // We need to use the compiler interface for now. |
76 | int threads[2] = {3, 3}; |
77 | __kmpc_push_num_threads_list(NULL, gtid: __kmpc_global_thread_num(NULL), length: 2, |
78 | list: threads); |
79 | #pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 1st level |
80 | { |
81 | #pragma omp single |
82 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
83 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
84 | { |
85 | #pragma omp single |
86 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
87 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
88 | { |
89 | // NOTE: should just keep using last element in list, to nesting depth |
90 | #pragma omp single |
91 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
92 | } // end 3rd level parallel |
93 | } // end 2nd level parallel |
94 | } // end 1st level parallel |
95 | |
96 | // Similar, but at a lower level. |
97 | #pragma omp parallel reduction(+ : num_failed) // 1st level |
98 | { |
99 | #pragma omp single |
100 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
101 | int threads[2] = {3, 3}; |
102 | __kmpc_push_num_threads_list(NULL, gtid: __kmpc_global_thread_num(NULL), length: 2, |
103 | list: threads); |
104 | #pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 2nd level |
105 | { |
106 | #pragma omp single |
107 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
108 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
109 | { |
110 | // NOTE: just keep using last element in list, to nesting depth |
111 | #pragma omp single |
112 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
113 | } // end 3rd level parallel |
114 | } // end 2nd level parallel |
115 | // Make sure a second inner parallel is NOT affected by the clause |
116 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
117 | { |
118 | #pragma omp single |
119 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
120 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
121 | { |
122 | #pragma omp single |
123 | // NOTE: just keep using last element in list, to nesting depth |
124 | num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected |
125 | } // end 3rd level parallel |
126 | } // end 2nd level parallel |
127 | } // end 1st level parallel |
128 | |
129 | // Test lists at multiple levels |
130 | int threads2[2] = {3,2}; |
131 | __kmpc_push_num_threads_list(NULL, gtid: __kmpc_global_thread_num(NULL), length: 2, |
132 | list: threads2); |
133 | #pragma omp parallel reduction(+ : num_failed) // num_threads(3,2) // 1st level |
134 | { |
135 | #pragma omp single |
136 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
137 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
138 | { |
139 | #pragma omp single |
140 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
141 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
142 | { |
143 | #pragma omp single |
144 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
145 | int threads3[2] = {3,1}; |
146 | __kmpc_push_num_threads_list(NULL, gtid: __kmpc_global_thread_num(NULL), length: 2, |
147 | list: threads3); |
148 | #pragma omp parallel reduction(+ : num_failed) // num_threads(3,1) // 4th level |
149 | { |
150 | #pragma omp single |
151 | num_failed = num_failed + !(omp_get_num_threads() == 3); |
152 | #pragma omp parallel reduction(+ : num_failed) // 5th level |
153 | { |
154 | #pragma omp single |
155 | num_failed = num_failed + !(omp_get_num_threads() == 1); |
156 | #pragma omp parallel reduction(+ : num_failed) // 6th level |
157 | { |
158 | #pragma omp single |
159 | num_failed = num_failed + !(omp_get_num_threads() == 1); |
160 | } // end 6th level parallel |
161 | } // end 5th level parallel |
162 | } // end 4th level parallel |
163 | #pragma omp parallel reduction(+ : num_failed) // 4th level |
164 | { |
165 | #pragma omp single |
166 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
167 | } // end 4th level parallel |
168 | } // end 3rd level parallel |
169 | } // end 2nd level parallel |
170 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
171 | { |
172 | #pragma omp single |
173 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
174 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
175 | { |
176 | #pragma omp single |
177 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
178 | } // end 3rd level parallel |
179 | } // end 2nd level parallel |
180 | } // end 1st level parallel |
181 | |
182 | // Now we should be back to the way we started. |
183 | #pragma omp parallel reduction(+ : num_failed) // 1st level |
184 | { |
185 | #pragma omp single |
186 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
187 | #pragma omp parallel reduction(+ : num_failed) // 2nd level |
188 | { |
189 | #pragma omp single |
190 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
191 | #pragma omp parallel reduction(+ : num_failed) // 3rd level |
192 | { |
193 | #pragma omp single |
194 | num_failed = num_failed + !(omp_get_num_threads() == 2); |
195 | } // end 3rd level parallel |
196 | } // end 2nd level parallel |
197 | } // end 1st level parallel |
198 | |
199 | return (!num_failed); |
200 | } |
201 | |
202 | int main() { |
203 | int i; |
204 | int num_failed = 0; |
205 | |
206 | for (i = 0; i < REPETITIONS; i++) { |
207 | if (!test_omp_parallel_num_threads_list()) { |
208 | num_failed++; |
209 | } |
210 | } |
211 | return num_failed; |
212 | } |
213 | |