1#include <omp.h>
2#include <malloc.h>
3#include <stdio.h>
4#include <memory.h>
5
6#define LOOP_IV_TYPE0 LOOP_TYPES
7#define LOOP_TYPE0 LOOP_TYPES
8#define LOOP_STYPE0 LOOP_TYPES
9
10#define LOOP_IV_TYPE1 LOOP_TYPES
11#define LOOP_TYPE1 LOOP_TYPES
12#define LOOP_STYPE1 LOOP_TYPES
13
14#define LOOP_IV_TYPE2 LOOP_TYPES
15#define LOOP_TYPE2 LOOP_TYPES
16#define LOOP_STYPE2 LOOP_TYPES
17
18#define MAX_THREADS 256
19
20#if defined VERBOSE
21#define PRINTF(...) printf(__VA_ARGS__)
22#else
23#define PRINTF(...)
24#endif
25
26LOOP_TYPE0 iLB, iUB;
27LOOP_TYPE1 jA0, jB0;
28LOOP_TYPE2 kA0, kB0;
29
30LOOP_STYPE0 iStep;
31LOOP_STYPE1 jA1, jB1, jStep;
32LOOP_STYPE2 kA1, kB1, kStep;
33
34// We can check <=, <, >=, > (!= has different pattern)
35// Additional definition of LOOP_LEi, LOOP_LTi, etc. is helpful to build calls
36// of the test from main
37
38#if defined LOOP_LE0
39#define COMPARE0 <=
40#elif defined LOOP_LT0
41#define COMPARE0 <
42#elif defined LOOP_GE0
43#define COMPARE0 >=
44#elif defined LOOP_GT0
45#define COMPARE0 >
46#endif
47
48#if defined LOOP_LE1
49#define COMPARE1 <=
50#elif defined LOOP_LT1
51#define COMPARE1 <
52#elif defined LOOP_GE1
53#define COMPARE1 >=
54#elif defined LOOP_GT1
55#define COMPARE1 >
56#endif
57
58#if defined LOOP_LE2
59#define COMPARE2 <=
60#elif defined LOOP_LT2
61#define COMPARE2 <
62#elif defined LOOP_GE2
63#define COMPARE2 >=
64#elif defined LOOP_GT2
65#define COMPARE2 >
66#endif
67
68typedef struct {
69 LOOP_IV_TYPE0 i;
70 LOOP_IV_TYPE1 j;
71 LOOP_IV_TYPE2 k;
72} spaceType;
73
74spaceType *AllocSpace(unsigned size) {
75
76 spaceType *p = (spaceType *)malloc(size: size * sizeof(spaceType));
77 memset(s: p, c: 0, n: size * sizeof(spaceType));
78 return p;
79}
80
81void FreeSpace(spaceType *space) { free(ptr: space); }
82
83// record an iteration
84void Set(spaceType *space, unsigned count, unsigned trueCount, LOOP_IV_TYPE0 i,
85 LOOP_IV_TYPE1 j, LOOP_IV_TYPE0 k) {
86 if (count > trueCount) {
87 // number of iterations exceeded
88 // will be reported with checks
89 return;
90 }
91 space[count - 1].i = i;
92 space[count - 1].j = j;
93 space[count - 1].k = k;
94}
95int test() {
96 int pass = 1;
97 LOOP_IV_TYPE0 i;
98 LOOP_IV_TYPE1 j;
99 LOOP_IV_TYPE2 k;
100
101 spaceType *openmpSpace;
102 spaceType *scalarSpace;
103
104 unsigned trueCount = 0;
105 unsigned openmpCount = 0;
106 unsigned scalarCount = 0;
107 unsigned uselessThreadsOpenMP = 0;
108 unsigned usefulThreadsOpenMP = 0;
109
110 // Use half of the available threads/logical processors.
111 unsigned num_threads = omp_get_max_threads() / 2;
112
113 // Make sure num_threads is not 0 after the division in case
114 // omp_get_max_threads() returns 1.
115 if (num_threads == 0)
116 num_threads = 1;
117
118 if (num_threads > MAX_THREADS)
119 num_threads = MAX_THREADS;
120
121 unsigned long *chunkSizesOpenmp =
122 (unsigned long *)malloc(size: sizeof(unsigned long) * num_threads);
123 memset(s: chunkSizesOpenmp, c: 0, n: sizeof(unsigned long) * num_threads);
124
125 // count iterations and allocate space
126 LOOP { ++trueCount; }
127
128 openmpSpace = AllocSpace(size: trueCount);
129 scalarSpace = AllocSpace(size: trueCount);
130
131 // fill the scalar (compare) space
132 LOOP {
133 ++scalarCount;
134 Set(space: scalarSpace, count: scalarCount, trueCount, i, j, k);
135 }
136
137 // test run body:
138 // perform and record OpenMP iterations and thread use
139#pragma omp parallel num_threads(num_threads)
140 {
141 unsigned gtid = omp_get_thread_num();
142#pragma omp for collapse(3) private(i, j, k)
143 LOOP {
144 unsigned count;
145#pragma omp atomic update
146 ++chunkSizesOpenmp[gtid];
147#pragma omp atomic capture
148 count = ++openmpCount;
149 Set(space: openmpSpace, count, trueCount, i, j, k);
150 }
151 }
152
153 // check for the right number of iterations processed
154 // (only need to check for less, greater is checked when recording)
155 if (openmpCount < trueCount) {
156 PRINTF("OpenMP FAILURE: Openmp processed fewer iterations: %d vs %d\n",
157 openmpCount, trueCount);
158 pass = 0;
159 } else if (openmpCount > trueCount) {
160 PRINTF("OpenMP FAILURE: Openmp processed more iterations: %d vs %d\n",
161 openmpCount, trueCount);
162 pass = 0;
163 }
164
165 // check openMP for iteration correctnes against scalar
166 for (unsigned i = 0; i < trueCount; i++) {
167 unsigned j;
168 for (j = 0; j < openmpCount; j++) {
169 if ((scalarSpace[i].i == openmpSpace[j].i) &&
170 (scalarSpace[i].j == openmpSpace[j].j) &&
171 (scalarSpace[i].k == openmpSpace[j].k)) {
172 break;
173 }
174 }
175 if (j == openmpCount) {
176 PRINTF("OpenMP FAILURE: (%d %d %d) not processed\n", scalarSpace[i].i,
177 scalarSpace[i].j, scalarSpace[i].k);
178 pass = 0;
179 }
180 }
181
182 // check for efficient thread use
183 for (unsigned i = 0; i < num_threads; ++i) {
184 if (chunkSizesOpenmp[i] == 0) {
185 ++uselessThreadsOpenMP;
186 }
187 }
188
189 // a check to see if at least more than one thread was used (weakish)
190 if ((uselessThreadsOpenMP == num_threads - 1) && (trueCount > 1)) {
191 PRINTF("OpenMP FAILURE: threads are not used\n");
192 pass = 0;
193 }
194
195#if 0
196 // a check to see if the load was spread more or less evenly so that
197 // when there was more work than threads each one got at least something
198 // (stronger, but may currently fail for a general collapse case)
199 if ((trueCount >= num_threads) && (uselessThreadsOpenMP > 0)) {
200 PRINTF("OpenMP FAILURE: %d threads not used with %d iterations\n",
201 uselessThreadsOpenMP, openmpCount);
202 pass = 0;
203 }
204#endif
205
206 // clean up space
207 FreeSpace(space: openmpSpace);
208 FreeSpace(space: scalarSpace);
209 free(ptr: chunkSizesOpenmp);
210 return pass;
211}
212

source code of openmp/runtime/test/worksharing/for/collapse_test.inc