1// RUN: %libomp-compile-and-run
2
3// The test checks schedule(simd:runtime)
4// in combination with omp_set_schedule()
5#include <stdio.h>
6#include <stdlib.h>
7#include <omp.h>
8
9#if defined(WIN32) || defined(_WIN32)
10#include <windows.h>
11#define delay() Sleep(1);
12#define seten(a,b,c) _putenv_s((a),(b))
13#else
14#include <unistd.h>
15#define delay() usleep(10);
16#define seten(a,b,c) setenv((a),(b),(c))
17#endif
18
19#define SIMD_LEN 4
20int err = 0;
21
22// ---------------------------------------------------------------------------
23// Various definitions copied from OpenMP RTL.
24enum sched {
25 kmp_sch_static_balanced_chunked = 45,
26 kmp_sch_guided_simd = 46,
27 kmp_sch_runtime_simd = 47,
28};
29typedef unsigned u32;
30typedef long long i64;
31typedef unsigned long long u64;
32typedef struct {
33 int reserved_1;
34 int flags;
35 int reserved_2;
36 int reserved_3;
37 char *psource;
38} id;
39
40#ifdef __cplusplus
41extern "C" {
42#endif
43 int __kmpc_global_thread_num(id*);
44 void __kmpc_barrier(id*, int gtid);
45 void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int);
46 void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64);
47 int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*);
48 int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*);
49#ifdef __cplusplus
50} // extern "C"
51#endif
52// End of definitions copied from OpenMP RTL.
53// ---------------------------------------------------------------------------
54static id loc = {.reserved_1: 0, .flags: 2, .reserved_2: 0, .reserved_3: 0, .psource: ";file;func;0;0;;"};
55
56// ---------------------------------------------------------------------------
57void
58run_loop(
59 int loop_lb, // Loop lower bound.
60 int loop_ub, // Loop upper bound.
61 int loop_st, // Loop stride.
62 int lchunk
63) {
64 static int volatile loop_sync = 0;
65 int lb; // Chunk lower bound.
66 int ub; // Chunk upper bound.
67 int st; // Chunk stride.
68 int rc;
69 int nthreads = omp_get_num_threads();
70 int tid = omp_get_thread_num();
71 int gtid = __kmpc_global_thread_num(&loc);
72 int last;
73 int tc = (loop_ub - loop_lb) / loop_st + 1;
74 int ch;
75 int no_chunk = 0;
76 if (lchunk == 0) {
77 no_chunk = 1;
78 lchunk = 1;
79 }
80 ch = lchunk * SIMD_LEN;
81#if _DEBUG > 1
82 printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
83 gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
84#endif
85 // Don't test degenerate cases that should have been discovered by codegen.
86 if (loop_st == 0)
87 return;
88 if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
89 return;
90 __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
91 loop_lb, loop_ub, loop_st, SIMD_LEN);
92 {
93 // Let the master thread handle the chunks alone.
94 int chunk; // No of current chunk.
95 int last_ub; // Upper bound of the last processed chunk.
96 u64 cur; // Number of interations in current chunk.
97 u64 max; // Max allowed iterations for current chunk.
98 int undersized = 0;
99 last_ub = loop_ub;
100 chunk = 0;
101 max = (loop_ub - loop_lb) / loop_st + 1;
102 // The first chunk can consume all iterations.
103 while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
104 ++ chunk;
105#if _DEBUG
106 printf(format: "th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
107 tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
108#endif
109 // Check if previous chunk (it is not the final chunk) is undersized.
110 if (undersized)
111 printf(format: "Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
112 if (loop_st > 0) {
113 if (!(ub <= loop_ub))
114 printf(format: "Error with ub %d, %d, ch %d, err %d\n",
115 (int)ub, (int)loop_ub, chunk, ++err);
116 if (!(lb <= ub))
117 printf(format: "Error with bounds %d, %d, %d, err %d\n",
118 (int)lb, (int)ub, chunk, ++err);
119 } else {
120 if (!(ub >= loop_ub))
121 printf(format: "Error with ub %d, %d, %d, err %d\n",
122 (int)ub, (int)loop_ub, chunk, ++err);
123 if (!(lb >= ub))
124 printf(format: "Error with bounds %d, %d, %d, err %d\n",
125 (int)lb, (int)ub, chunk, ++err);
126 }; // if
127 // Stride should not change.
128 if (!(st == loop_st))
129 printf(format: "Error with st %d, %d, ch %d, err %d\n",
130 (int)st, (int)loop_st, chunk, ++err);
131 cur = ( ub - lb ) / loop_st + 1;
132 // Guided scheduling uses FP computations, so current chunk may
133 // be a bit bigger (+1) than allowed maximum.
134 if (!( cur <= max + 1))
135 printf(format: "Error with iter %llu, %llu, err %d\n", cur, max, ++err);
136 // Update maximum for the next chunk.
137 if (last) {
138 if (!no_chunk && cur > ch && nthreads > 1)
139 printf(format: "Error: too big last chunk %d (%d), tid %d, err %d\n",
140 (int)cur, ch, tid, ++err);
141 } else {
142 if (cur % ch)
143 printf(format: "Error with chunk %d, %d, ch %d, tid %d, err %d\n",
144 chunk, (int)cur, ch, tid, ++err);
145 }
146 if (cur < max)
147 max = cur;
148 last_ub = ub;
149 undersized = (cur < ch);
150#if _DEBUG > 1
151 if (last)
152 printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
153 undersized,cur,ch,tid,ub,lb,loop_st);
154#endif
155 } // while
156 // Must have the right last iteration index.
157 if (loop_st > 0) {
158 if (!(last_ub <= loop_ub))
159 printf(format: "Error with last1 %d, %d, ch %d, err %d\n",
160 (int)last_ub, (int)loop_ub, chunk, ++err);
161 if (last && !(last_ub + loop_st > loop_ub))
162 printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n",
163 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
164 } else {
165 if (!(last_ub >= loop_ub))
166 printf(format: "Error with last1 %d, %d, ch %d, err %d\n",
167 (int)last_ub, (int)loop_ub, chunk, ++err);
168 if (last && !(last_ub + loop_st < loop_ub))
169 printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n",
170 (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
171 } // if
172 }
173 __kmpc_barrier(&loc, gtid);
174} // run_loop
175
176int main(int argc, char *argv[])
177{
178 int chunk = 0;
179// static (no chunk)
180 omp_set_schedule(omp_sched_static,0);
181#pragma omp parallel// num_threads(num_th)
182 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
183
184// auto (chunk should be ignorted)
185 omp_set_schedule(omp_sched_auto,0);
186#pragma omp parallel// num_threads(num_th)
187 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
188
189// static,1
190 chunk = 1;
191 omp_set_schedule(omp_sched_static,1);
192#pragma omp parallel// num_threads(num_th)
193 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
194
195// dynamic,1
196 omp_set_schedule(omp_sched_dynamic,1);
197#pragma omp parallel// num_threads(num_th)
198 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
199
200// guided,1
201 omp_set_schedule(omp_sched_guided,1);
202#pragma omp parallel// num_threads(num_th)
203 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
204
205// dynamic,0 - use default chunk size 1
206 omp_set_schedule(omp_sched_dynamic,0);
207#pragma omp parallel// num_threads(num_th)
208 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
209
210// guided,0 - use default chunk size 1
211 omp_set_schedule(omp_sched_guided,0);
212#pragma omp parallel// num_threads(num_th)
213 run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk);
214
215 if (err) {
216 printf(format: "failed, err = %d\n", err);
217 return 1;
218 } else {
219 printf(format: "passed\n");
220 return 0;
221 }
222}
223

source code of openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c