kmp_sch_simd_runtime_api.c source code [openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c]

1	// RUN: %libomp-compile-and-run
2
3	// The test checks schedule(simd:runtime)
4	// in combination with omp_set_schedule()
5	#include <stdio.h>
6	#include <stdlib.h>
7	#include <omp.h>
8
9	#if defined(WIN32) \|\| defined(_WIN32)
10	#include <windows.h>
11	#define delay() Sleep(1);
12	#define seten(a,b,c) _putenv_s((a),(b))
13	#else
14	#include <unistd.h>
15	#define delay() usleep(10);
16	#define seten(a,b,c) setenv((a),(b),(c))
17	#endif
18
19	#define SIMD_LEN 4
20	int err = `0`;
21
22	// ---------------------------------------------------------------------------
23	// Various definitions copied from OpenMP RTL.
24	enum sched {
25	kmp_sch_static_balanced_chunked = `45`,
26	kmp_sch_guided_simd = `46`,
27	kmp_sch_runtime_simd = `47`,
28	};
29	typedef unsigned u32;
30	typedef long long i64;
31	typedef unsigned long long u64;
32	typedef struct {
33	int reserved_1;
34	int flags;
35	int reserved_2;
36	int reserved_3;
37	char *psource;
38	} id;
39
40	#ifdef __cplusplus
41	extern "C" {
42	#endif
43	int __kmpc_global_thread_num(id*);
44	void __kmpc_barrier(id, int* gtid);
45	void __kmpc_dispatch_init_4(id, int, enum* sched, int, int, int, int);
46	void __kmpc_dispatch_init_8(id, int, enum* sched, i64, i64, i64, i64);
47	int __kmpc_dispatch_next_4(id, int, void*, void*, void*, void**);
48	int __kmpc_dispatch_next_8(id, int, void*, void*, void*, void**);
49	#ifdef __cplusplus
50	} // extern "C"
51	#endif
52	// End of definitions copied from OpenMP RTL.
53	// ---------------------------------------------------------------------------
54	static id loc = {.reserved_1: `0`, .flags: `2`, .reserved_2: `0`, .reserved_3: `0`, .psource: ";file;func;0;0;;"};
55
56	// ---------------------------------------------------------------------------
57	void
58	run_loop(
59	int loop_lb, // Loop lower bound.
60	int loop_ub, // Loop upper bound.
61	int loop_st, // Loop stride.
62	int lchunk
63	) {
64	static int volatile loop_sync = `0`;
65	int lb; // Chunk lower bound.
66	int ub; // Chunk upper bound.
67	int st; // Chunk stride.
68	int rc;
69	int nthreads = omp_get_num_threads();
70	int tid = omp_get_thread_num();
71	int gtid = __kmpc_global_thread_num(&loc);
72	int last;
73	int tc = (loop_ub - loop_lb) / loop_st + `1`;
74	int ch;
75	int no_chunk = `0`;
76	if (lchunk == `0`) {
77	no_chunk = `1`;
78	lchunk = `1`;
79	}
80	ch = lchunk * SIMD_LEN;
81	#if _DEBUG > 1
82	printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
83	gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
84	#endif
85	// Don't test degenerate cases that should have been discovered by codegen.
86	if (loop_st == `0`)
87	return;
88	if (loop_st > `0` ? loop_lb > loop_ub : loop_lb < loop_ub)
89	return;
90	__kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
91	loop_lb, loop_ub, loop_st, SIMD_LEN);
92	{
93	// Let the master thread handle the chunks alone.
94	int chunk; // No of current chunk.
95	int last_ub; // Upper bound of the last processed chunk.
96	u64 cur; // Number of interations in current chunk.
97	u64 max; // Max allowed iterations for current chunk.
98	int undersized = `0`;
99	last_ub = loop_ub;
100	chunk = `0`;
101	max = (loop_ub - loop_lb) / loop_st + `1`;
102	// The first chunk can consume all iterations.
103	while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
104	++ chunk;
105	#if _DEBUG
106	printf(format: "th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
107	tid, chunk, (int)lb, (int)ub, (int)(ub-lb+`1`));
108	#endif
109	// Check if previous chunk (it is not the final chunk) is undersized.
110	if (undersized)
111	printf(format: "Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
112	if (loop_st > `0`) {
113	if (!(ub <= loop_ub))
114	printf(format: "Error with ub %d, %d, ch %d, err %d\n",
115	(int)ub, (int)loop_ub, chunk, ++err);
116	if (!(lb <= ub))
117	printf(format: "Error with bounds %d, %d, %d, err %d\n",
118	(int)lb, (int)ub, chunk, ++err);
119	} else {
120	if (!(ub >= loop_ub))
121	printf(format: "Error with ub %d, %d, %d, err %d\n",
122	(int)ub, (int)loop_ub, chunk, ++err);
123	if (!(lb >= ub))
124	printf(format: "Error with bounds %d, %d, %d, err %d\n",
125	(int)lb, (int)ub, chunk, ++err);
126	}; // if
127	// Stride should not change.
128	if (!(st == loop_st))
129	printf(format: "Error with st %d, %d, ch %d, err %d\n",
130	(int)st, (int)loop_st, chunk, ++err);
131	cur = ( ub - lb ) / loop_st + `1`;
132	// Guided scheduling uses FP computations, so current chunk may
133	// be a bit bigger (+1) than allowed maximum.
134	if (!( cur <= max + `1`))
135	printf(format: "Error with iter %llu, %llu, err %d\n", cur, max, ++err);
136	// Update maximum for the next chunk.
137	if (last) {
138	if (!no_chunk && cur > ch && nthreads > `1`)
139	printf(format: "Error: too big last chunk %d (%d), tid %d, err %d\n",
140	(int)cur, ch, tid, ++err);
141	} else {
142	if (cur % ch)
143	printf(format: "Error with chunk %d, %d, ch %d, tid %d, err %d\n",
144	chunk, (int)cur, ch, tid, ++err);
145	}
146	if (cur < max)
147	max = cur;
148	last_ub = ub;
149	undersized = (cur < ch);
150	#if _DEBUG > 1
151	if (last)
152	printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
153	undersized,cur,ch,tid,ub,lb,loop_st);
154	#endif
155	} // while
156	// Must have the right last iteration index.
157	if (loop_st > `0`) {
158	if (!(last_ub <= loop_ub))
159	printf(format: "Error with last1 %d, %d, ch %d, err %d\n",
160	(int)last_ub, (int)loop_ub, chunk, ++err);
161	if (last && !(last_ub + loop_st > loop_ub))
162	printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n",
163	(int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
164	} else {
165	if (!(last_ub >= loop_ub))
166	printf(format: "Error with last1 %d, %d, ch %d, err %d\n",
167	(int)last_ub, (int)loop_ub, chunk, ++err);
168	if (last && !(last_ub + loop_st < loop_ub))
169	printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n",
170	(int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
171	} // if
172	}
173	__kmpc_barrier(&loc, gtid);
174	} // run_loop
175
176	int main(int argc, char *argv[])
177	{
178	int chunk = `0`;
179	// static (no chunk)
180	omp_set_schedule(omp_sched_static,`0`);
181	#pragma omp parallel// num_threads(num_th)
182	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
183
184	// auto (chunk should be ignorted)
185	omp_set_schedule(omp_sched_auto,`0`);
186	#pragma omp parallel// num_threads(num_th)
187	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
188
189	// static,1
190	chunk = `1`;
191	omp_set_schedule(omp_sched_static,`1`);
192	#pragma omp parallel// num_threads(num_th)
193	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
194
195	// dynamic,1
196	omp_set_schedule(omp_sched_dynamic,`1`);
197	#pragma omp parallel// num_threads(num_th)
198	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
199
200	// guided,1
201	omp_set_schedule(omp_sched_guided,`1`);
202	#pragma omp parallel// num_threads(num_th)
203	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
204
205	// dynamic,0 - use default chunk size 1
206	omp_set_schedule(omp_sched_dynamic,`0`);
207	#pragma omp parallel// num_threads(num_th)
208	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
209
210	// guided,0 - use default chunk size 1
211	omp_set_schedule(omp_sched_guided,`0`);
212	#pragma omp parallel// num_threads(num_th)
213	run_loop(loop_lb: `0`, loop_ub: `26`, loop_st: `1`, lchunk: chunk);
214
215	if (err) {
216	printf(format: "failed, err = %d\n", err);
217	return `1`;
218	} else {
219	printf(format: "passed\n");
220	return `0`;
221	}
222	}
223

source code of openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c