1// RUN: %libomp-compile-and-run
2
3/* Test for guided scheduling
4 * Ensure threads get chunks interleavely first
5 * Then judge the chunk sizes are decreasing to a stable value
6 * Modified by Chunhua Liao
7 * For example, 100 iteration on 2 threads, chunksize 7
8 * one line for each dispatch, 0/1 means thread id
9 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24
10 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18
11 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
12 * 1 1 1 1 1 1 1 1 1 1 10
13 * 0 0 0 0 0 0 0 0 8
14 * 1 1 1 1 1 1 1 7
15 * 0 0 0 0 0 0 0 7
16 * 1 1 1 1 1 1 1 7
17 * 0 0 0 0 0 5
18*/
19#include <stdio.h>
20#include <stdlib.h>
21#include "omp_testsuite.h"
22#include "omp_my_sleep.h"
23
24#define CFSMAX_SIZE 1000
25#define MAX_TIME 0.005
26
27#ifdef SLEEPTIME
28#undef SLEEPTIME
29#define SLEEPTIME 0.0001
30#endif
31
32int test_omp_for_schedule_guided()
33{
34 int * tids;
35 int * chunksizes;
36 int notout;
37 int maxiter;
38 int threads;
39 int i;
40 int result;
41
42 tids = (int *) malloc (size: sizeof (int) * (CFSMAX_SIZE + 1));
43 maxiter = 0;
44 result = 1;
45 notout = 1;
46
47 /* Testing if enough threads are available for this check. */
48 #pragma omp parallel
49 {
50 #pragma omp single
51 {
52 threads = omp_get_num_threads();
53 }
54 }
55
56 /* ensure there are at least two threads */
57 if (threads < 2) {
58 omp_set_num_threads(2);
59 threads = 2;
60 }
61
62 /* Now the real parallel work:
63 * Each thread will start immediately with the first chunk.
64 */
65 #pragma omp parallel shared(tids,maxiter)
66 { /* begin of parallel */
67 double count;
68 int tid;
69 int j;
70
71 tid = omp_get_thread_num ();
72
73 #pragma omp for nowait schedule(guided)
74 for(j = 0; j < CFSMAX_SIZE; ++j) {
75 count = 0.;
76 #pragma omp flush(maxiter)
77 if (j > maxiter) {
78 #pragma omp critical
79 {
80 maxiter = j;
81 }
82 }
83 /*printf ("thread %d sleeping\n", tid);*/
84 #pragma omp flush(maxiter,notout)
85 while (notout && (count < MAX_TIME) && (maxiter == j)) {
86 #pragma omp flush(maxiter,notout)
87 my_sleep (SLEEPTIME);
88 count += SLEEPTIME;
89#ifdef VERBOSE
90 printf(".");
91#endif
92 }
93#ifdef VERBOSE
94 if (count > 0.) printf(" waited %lf s\n", count);
95#endif
96 /*printf ("thread %d awake\n", tid);*/
97 tids[j] = tid;
98#ifdef VERBOSE
99 printf("%d finished by %d\n",j,tid);
100#endif
101 } /* end of for */
102 notout = 0;
103 #pragma omp flush(maxiter,notout)
104 } /* end of parallel */
105
106 /*******************************************************
107 * evaluation of the values *
108 *******************************************************/
109 {
110 int determined_chunksize = 1;
111 int last_threadnr = tids[0];
112 int global_chunknr = 0;
113 int openwork = CFSMAX_SIZE;
114 int expected_chunk_size;
115 int* local_chunknr = (int*)malloc(size: threads * sizeof(int));
116 double c = 1;
117
118 for (i = 0; i < threads; i++)
119 local_chunknr[i] = 0;
120
121 tids[CFSMAX_SIZE] = -1;
122
123 /*
124 * determine the number of global chunks
125 */
126 // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n");
127 for(i = 1; i <= CFSMAX_SIZE; ++i) {
128 if (last_threadnr==tids[i]) {
129 determined_chunksize++;
130 } else {
131 /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr,
132 last_threadnr, local_chunknr[last_threadnr], m); */
133 global_chunknr++;
134 local_chunknr[last_threadnr]++;
135 last_threadnr = tids[i];
136 determined_chunksize = 1;
137 }
138 }
139 /* now allocate the memory for saving the sizes of the global chunks */
140 chunksizes = (int*)malloc(size: global_chunknr * sizeof(int));
141
142 /*
143 * Evaluate the sizes of the global chunks
144 */
145 global_chunknr = 0;
146 determined_chunksize = 1;
147 last_threadnr = tids[0];
148 for (i = 1; i <= CFSMAX_SIZE; ++i) {
149 /* If the threadnumber was the same as before increase the
150 * detected chunksize for this chunk otherwise set the detected
151 * chunksize again to one and save the number of the next
152 * thread in last_threadnr.
153 */
154 if (last_threadnr == tids[i]) {
155 determined_chunksize++;
156 } else {
157 chunksizes[global_chunknr] = determined_chunksize;
158 global_chunknr++;
159 local_chunknr[last_threadnr]++;
160 last_threadnr = tids[i];
161 determined_chunksize = 1;
162 }
163 }
164
165#ifdef VERBOSE
166 fprintf(stderr, "found\texpected\tconstant\n");
167#endif
168
169 /* identify the constant c for the exponential
170 decrease of the chunksize */
171 expected_chunk_size = openwork / threads;
172 c = (double) chunksizes[0] / expected_chunk_size;
173
174 for (i = 0; i < global_chunknr; i++) {
175 /* calculate the new expected chunksize */
176 if (expected_chunk_size > 1)
177 expected_chunk_size = c * openwork / threads;
178#ifdef VERBOSE
179 fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i],
180 expected_chunk_size, c * chunksizes[i]/expected_chunk_size);
181#endif
182 /* check if chunksize is inside the rounding errors */
183 if (abs (x: chunksizes[i] - expected_chunk_size) >= 2) {
184 result = 0;
185#ifndef VERBOSE
186 fprintf(stderr, format: "Chunksize differed from expected "
187 "value: %d instead of %d\n", chunksizes[i],
188 expected_chunk_size);
189 return 0;
190#endif
191 } /* end if */
192
193#ifndef VERBOSE
194 if (expected_chunk_size - chunksizes[i] < 0)
195 fprintf(stderr, format: "Chunksize did not decrease: %d"
196 " instead of %d\n", chunksizes[i],expected_chunk_size);
197#endif
198
199 /* calculating the remaining amount of work */
200 openwork -= chunksizes[i];
201 }
202 }
203 return result;
204}
205
206int main()
207{
208 int i;
209 int num_failed=0;
210
211 for(i = 0; i < REPETITIONS; i++) {
212 if(!test_omp_for_schedule_guided()) {
213 num_failed++;
214 }
215 }
216 return num_failed;
217}
218

source code of openmp/runtime/test/worksharing/for/omp_for_schedule_guided.c