1// RUN: %libomp-compile
2// RUN: env OMP_SCHEDULE=guided %libomp-run
3// RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1
4// RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2
5// RUN: env OMP_SCHEDULE=dynamic %libomp-run
6// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
7// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
8// RUN: env OMP_SCHEDULE=auto %libomp-run
9
10// The test checks schedule(simd:runtime)
11// in combination with OMP_SCHEDULE=guided[,chunk]
12#include <stdio.h>
13#include <stdlib.h>
14#include <omp.h>
15
16#if defined(WIN32) || defined(_WIN32)
17#include <windows.h>
18#define delay() Sleep(1);
19#define seten(a,b,c) _putenv_s((a),(b))
20#else
21#include <unistd.h>
22#define delay() usleep(10);
23#define seten(a,b,c) setenv((a),(b),(c))
24#endif
25
26#define UBOUND 100
27#define SIMD_LEN 4
28int err = 0;
29
30// ---------------------------------------------------------------------------
31// Various definitions copied from OpenMP RTL.
32enum sched {
33 kmp_sch_static_balanced_chunked = 45,
34 kmp_sch_guided_simd = 46,
35 kmp_sch_runtime_simd = 47,
36};
37typedef unsigned u32;
38typedef long long i64;
39typedef unsigned long long u64;
40typedef struct {
41 int reserved_1;
42 int flags;
43 int reserved_2;
44 int reserved_3;
45 char *psource;
46} id;
47
48#ifdef __cplusplus
49extern "C" {
50#endif
51 int __kmpc_global_thread_num(id*);
52 void __kmpc_barrier(id*, int gtid);
53 void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
54 void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
55 int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
56 int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
57#ifdef __cplusplus
58} // extern "C"
59#endif
60// End of definitions copied from OpenMP RTL.
61// ---------------------------------------------------------------------------
62static id loc = {.reserved_1: 0, .flags: 2, .reserved_2: 0, .reserved_3: 0, .psource: ";file;func;0;0;;"};
63
64// ---------------------------------------------------------------------------
65void
66run_loop(
67 int loop_lb, // Loop lower bound.
68 int loop_ub, // Loop upper bound.
69 int loop_st, // Loop stride.
70 int lchunk
71) {
72 static int volatile loop_sync = 0;
73 int lb; // Chunk lower bound.
74 int ub; // Chunk upper bound.
75 int st; // Chunk stride.
76 int rc;
77 int nthreads = omp_get_num_threads();
78 int tid = omp_get_thread_num();
79 int gtid = __kmpc_global_thread_num(&loc);
80 int last;
81 int tc = (loop_ub - loop_lb) / loop_st + 1;
82 int ch;
83 int no_chunk = 0;
84 if (lchunk == 0) {
85 no_chunk = 1;
86 lchunk = 1;
87 }
88 ch = lchunk * SIMD_LEN;
89#if _DEBUG > 1
90 printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
91 gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
92#endif
93 // Don't test degenerate cases that should have been discovered by codegen.
94 if (loop_st == 0)
95 return;
96 if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
97 return;
98 __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
99 loop_lb, loop_ub, loop_st, SIMD_LEN);
100 {
101 // Let the master thread handle the chunks alone.
102 int chunk; // No of current chunk.
103 int last_ub; // Upper bound of the last processed chunk.
104 u64 cur; // Number of interations in current chunk.
105 u64 max; // Max allowed iterations for current chunk.
106 int undersized = 0;
107 last_ub = loop_ub;
108 chunk = 0;
109 max = (loop_ub - loop_lb) / loop_st + 1;
110 // The first chunk can consume all iterations.
111 while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
112 ++ chunk;
113#if _DEBUG
114 printf(format: "th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
115 tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
116#endif
117 // Check if previous chunk (it is not the final chunk) is undersized.
118 if (undersized)
119 printf(format: "Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
120 if (loop_st > 0) {
121 if (!(ub <= loop_ub))
122 printf(format: "Error with ub %d, %d, ch %d, err %d\n",
123 (int)ub, (int)loop_ub, chunk, ++err);
124 if (!(lb <= ub))
125 printf(format: "Error with bounds %d, %d, %d, err %d\n",
126 (int)lb, (int)ub, chunk, ++err);
127 } else {
128 if (!(ub >= loop_ub))
129 printf(format: "Error with ub %d, %d, %d, err %d\n",
130 (int)ub, (int)loop_ub, chunk, ++err);
131 if (!(lb >= ub))
132 printf(format: "Error with bounds %d, %d, %d, err %d\n",
133 (int)lb, (int)ub, chunk, ++err);
134 }; // if
135 // Stride should not change.
136 if (!(st == loop_st))
137 printf(format: "Error with st %d, %d, ch %d, err %d\n",
138 (int)st, (int)loop_st, chunk, ++err);
139 cur = ( ub - lb ) / loop_st + 1;
140 // Guided scheduling uses FP computations, so current chunk may
141 // be a bit bigger (+1) than allowed maximum.
142 if (!( cur <= max + 1))
143 printf(format: "Error with iter %llu, %llu, err %d\n", cur, max, ++err);
144 // Update maximum for the next chunk.
145 if (!last && cur % ch)
146 printf(format: "Error with chunk %d, %d, ch %d, tid %d, err %d\n",
147 chunk, (int)cur, ch, tid, ++err);
148 if (last && !no_chunk && cur > ch && nthreads > 1)
149 printf(format: "Error: too big last chunk %d (%d), tid %d, err %d\n",
150 (int)cur, ch, tid, ++err);
151 if (cur < max)
152 max = cur;
153 last_ub = ub;
154 undersized = (cur < ch);
155#if _DEBUG > 1
156 if (last)
157 printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
158 undersized,cur,ch,tid,ub,lb,loop_st);
159#endif
160 } // while
161 // Must have the right last iteration index.
162 if (loop_st > 0) {
163 if (!(last_ub <= loop_ub))
164 printf(format: "Error with last1 %d, %d, ch %d, err %d\n",
165 (int)last_ub, (int)loop_ub, chunk, ++err);
166 if (last && !(last_ub + loop_st > loop_ub))
167 printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n",
168 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
169 } else {
170 if (!(last_ub >= loop_ub))
171 printf(format: "Error with last1 %d, %d, ch %d, err %d\n",
172 (int)last_ub, (int)loop_ub, chunk, ++err);
173 if (last && !(last_ub + loop_st < loop_ub))
174 printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n",
175 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
176 } // if
177 }
178 __kmpc_barrier(&loc, gtid);
179} // run_loop
180
181int main(int argc, char *argv[])
182{
183 int chunk = 0;
184 if (argc > 1) {
185 // expect chunk size as a parameter
186 chunk = atoi(nptr: argv[1]);
187 }
188#pragma omp parallel //num_threads(num_th)
189 run_loop(loop_lb: 0, UBOUND, loop_st: 1, lchunk: chunk);
190 if (err) {
191 printf(format: "failed, err = %d\n", err);
192 return 1;
193 } else {
194 printf(format: "passed\n");
195 return 0;
196 }
197}
198

source code of openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c