1 | // RUN: %libomp-compile && %libomp-run |
2 | // RUN: %libomp-run 1 && %libomp-run 2 |
3 | |
4 | // The test checks schedule(simd:runtime) |
5 | // in combination with OMP_SCHEDULE=static[,chunk] |
6 | #include <stdio.h> |
7 | #include <stdlib.h> |
8 | #include <string.h> |
9 | #include <omp.h> |
10 | |
11 | #if defined(WIN32) || defined(_WIN32) |
12 | #include <windows.h> |
13 | #define delay() Sleep(1); |
14 | #define seten(a,b,c) _putenv_s((a),(b)) |
15 | #else |
16 | #include <unistd.h> |
17 | #define delay() usleep(10); |
18 | #define seten(a,b,c) setenv((a),(b),(c)) |
19 | #endif |
20 | |
21 | #define SIMD_LEN 4 |
22 | int err = 0; |
23 | |
24 | // --------------------------------------------------------------------------- |
25 | // Various definitions copied from OpenMP RTL. |
26 | enum sched { |
27 | kmp_sch_static_balanced_chunked = 45, |
28 | kmp_sch_guided_simd = 46, |
29 | kmp_sch_runtime_simd = 47, |
30 | }; |
31 | typedef unsigned u32; |
32 | typedef long long i64; |
33 | typedef unsigned long long u64; |
34 | typedef struct { |
35 | int reserved_1; |
36 | int flags; |
37 | int reserved_2; |
38 | int reserved_3; |
39 | char *psource; |
40 | } id; |
41 | |
42 | #ifdef __cplusplus |
43 | extern "C" { |
44 | #endif |
45 | int __kmpc_global_thread_num(id*); |
46 | void __kmpc_barrier(id*, int gtid); |
47 | void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); |
48 | void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); |
49 | int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); |
50 | int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); |
51 | #ifdef __cplusplus |
52 | } // extern "C" |
53 | #endif |
54 | // End of definitions copied from OpenMP RTL. |
55 | // --------------------------------------------------------------------------- |
56 | static id loc = {.reserved_1: 0, .flags: 2, .reserved_2: 0, .reserved_3: 0, .psource: ";file;func;0;0;;" }; |
57 | |
58 | // --------------------------------------------------------------------------- |
59 | void |
60 | run_loop( |
61 | int loop_lb, // Loop lower bound. |
62 | int loop_ub, // Loop upper bound. |
63 | int loop_st, // Loop stride. |
64 | int lchunk |
65 | ) { |
66 | static int volatile loop_sync = 0; |
67 | int lb; // Chunk lower bound. |
68 | int ub; // Chunk upper bound. |
69 | int st; // Chunk stride. |
70 | int rc; |
71 | int nthreads = omp_get_num_threads(); |
72 | int tid = omp_get_thread_num(); |
73 | int gtid = __kmpc_global_thread_num(&loc); |
74 | int last; |
75 | int tc = (loop_ub - loop_lb) / loop_st + 1; |
76 | int ch; |
77 | int no_chunk = 0; |
78 | if (lchunk == 0) { |
79 | no_chunk = 1; |
80 | lchunk = 1; |
81 | } |
82 | ch = lchunk * SIMD_LEN; |
83 | #if _DEBUG > 1 |
84 | printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n" , |
85 | gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); |
86 | #endif |
87 | // Don't test degenerate cases that should have been discovered by codegen. |
88 | if (loop_st == 0) |
89 | return; |
90 | if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) |
91 | return; |
92 | __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, |
93 | loop_lb, loop_ub, loop_st, SIMD_LEN); |
94 | { |
95 | // Let the master thread handle the chunks alone. |
96 | int chunk; // No of current chunk. |
97 | int last_ub; // Upper bound of the last processed chunk. |
98 | u64 cur; // Number of interations in current chunk. |
99 | u64 max; // Max allowed iterations for current chunk. |
100 | int undersized = 0; |
101 | last_ub = loop_ub; |
102 | chunk = 0; |
103 | max = (loop_ub - loop_lb) / loop_st + 1; |
104 | // The first chunk can consume all iterations. |
105 | while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { |
106 | ++ chunk; |
107 | #if _DEBUG |
108 | printf(format: "th %d: chunk=%d, lb=%d, ub=%d ch %d\n" , |
109 | tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); |
110 | #endif |
111 | // Check if previous chunk (it is not the final chunk) is undersized. |
112 | if (undersized) |
113 | printf(format: "Error with chunk %d, th %d, err %d\n" , chunk, tid, ++err); |
114 | if (loop_st > 0) { |
115 | if (!(ub <= loop_ub)) |
116 | printf(format: "Error with ub %d, %d, ch %d, err %d\n" , |
117 | (int)ub, (int)loop_ub, chunk, ++err); |
118 | if (!(lb <= ub)) |
119 | printf(format: "Error with bounds %d, %d, %d, err %d\n" , |
120 | (int)lb, (int)ub, chunk, ++err); |
121 | } else { |
122 | if (!(ub >= loop_ub)) |
123 | printf(format: "Error with ub %d, %d, %d, err %d\n" , |
124 | (int)ub, (int)loop_ub, chunk, ++err); |
125 | if (!(lb >= ub)) |
126 | printf(format: "Error with bounds %d, %d, %d, err %d\n" , |
127 | (int)lb, (int)ub, chunk, ++err); |
128 | }; // if |
129 | // Stride should not change. |
130 | if (!(st == loop_st)) |
131 | printf(format: "Error with st %d, %d, ch %d, err %d\n" , |
132 | (int)st, (int)loop_st, chunk, ++err); |
133 | cur = ( ub - lb ) / loop_st + 1; |
134 | // Guided scheduling uses FP computations, so current chunk may |
135 | // be a bit bigger (+1) than allowed maximum. |
136 | if (!( cur <= max + 1)) |
137 | printf(format: "Error with iter %llu, %llu, err %d\n" , cur, max, ++err); |
138 | // Update maximum for the next chunk. |
139 | if (last) { |
140 | if (!no_chunk && cur > ch && nthreads > 1) |
141 | printf(format: "Error: too big last chunk %d (%d), tid %d, err %d\n" , |
142 | (int)cur, ch, tid, ++err); |
143 | } else { |
144 | if (cur % ch) |
145 | printf(format: "Error with chunk %d, %d, ch %d, tid %d, err %d\n" , |
146 | chunk, (int)cur, ch, tid, ++err); |
147 | } |
148 | if (cur < max) |
149 | max = cur; |
150 | last_ub = ub; |
151 | undersized = (cur < ch); |
152 | #if _DEBUG > 1 |
153 | if (last) |
154 | printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n" , |
155 | undersized,cur,ch,tid,ub,lb,loop_st); |
156 | #endif |
157 | } // while |
158 | // Must have the right last iteration index. |
159 | if (loop_st > 0) { |
160 | if (!(last_ub <= loop_ub)) |
161 | printf(format: "Error with last1 %d, %d, ch %d, err %d\n" , |
162 | (int)last_ub, (int)loop_ub, chunk, ++err); |
163 | if (last && !(last_ub + loop_st > loop_ub)) |
164 | printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n" , |
165 | (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); |
166 | } else { |
167 | if (!(last_ub >= loop_ub)) |
168 | printf(format: "Error with last1 %d, %d, ch %d, err %d\n" , |
169 | (int)last_ub, (int)loop_ub, chunk, ++err); |
170 | if (last && !(last_ub + loop_st < loop_ub)) |
171 | printf(format: "Error with last2 %d, %d, %d, ch %d, err %d\n" , |
172 | (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); |
173 | } // if |
174 | } |
175 | __kmpc_barrier(&loc, gtid); |
176 | } // run_loop |
177 | |
178 | int main(int argc, char *argv[]) |
179 | { |
180 | int chunk = 0; |
181 | if (argc > 1) { |
182 | char *buf = malloc(size: 8 + strlen(s: argv[1])); |
183 | // expect chunk size as a parameter |
184 | chunk = atoi(nptr: argv[1]); |
185 | strcpy(dest: buf,src: "static," ); |
186 | strcat(dest: buf,src: argv[1]); |
187 | seten("OMP_SCHEDULE" ,buf,1); |
188 | printf(format: "Testing schedule(simd:%s)\n" , buf); |
189 | free(ptr: buf); |
190 | } else { |
191 | seten("OMP_SCHEDULE" ,"static" ,1); |
192 | printf(format: "Testing schedule(simd:static)\n" ); |
193 | } |
194 | #pragma omp parallel// num_threads(num_th) |
195 | run_loop(loop_lb: 0, loop_ub: 26, loop_st: 1, lchunk: chunk); |
196 | if (err) { |
197 | printf(format: "failed, err = %d\n" , err); |
198 | return 1; |
199 | } else { |
200 | printf(format: "passed\n" ); |
201 | return 0; |
202 | } |
203 | } |
204 | |