1 | /* |
2 | * kmp_sched.cpp -- static scheduling -- iteration initialization |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | /* Static scheduling initialization. |
14 | |
15 | NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however |
16 | it may change values between parallel regions. __kmp_max_nth |
17 | is the largest value __kmp_nth may take, 1 is the smallest. */ |
18 | |
19 | #include "kmp.h" |
20 | #include "kmp_error.h" |
21 | #include "kmp_i18n.h" |
22 | #include "kmp_itt.h" |
23 | #include "kmp_stats.h" |
24 | #include "kmp_str.h" |
25 | |
26 | #if OMPT_SUPPORT |
27 | #include "ompt-specific.h" |
28 | #endif |
29 | |
30 | #ifdef KMP_DEBUG |
31 | //------------------------------------------------------------------------- |
32 | // template for debug prints specification ( d, u, lld, llu ) |
33 | char const *traits_t<int>::spec = "d" ; |
34 | char const *traits_t<unsigned int>::spec = "u" ; |
35 | char const *traits_t<long long>::spec = "lld" ; |
36 | char const *traits_t<unsigned long long>::spec = "llu" ; |
37 | char const *traits_t<long>::spec = "ld" ; |
38 | //------------------------------------------------------------------------- |
39 | #endif |
40 | |
41 | #if KMP_STATS_ENABLED |
42 | #define KMP_STATS_LOOP_END(stat) \ |
43 | { \ |
44 | kmp_int64 t; \ |
45 | kmp_int64 u = (kmp_int64)(*pupper); \ |
46 | kmp_int64 l = (kmp_int64)(*plower); \ |
47 | kmp_int64 i = (kmp_int64)incr; \ |
48 | if (i == 1) { \ |
49 | t = u - l + 1; \ |
50 | } else if (i == -1) { \ |
51 | t = l - u + 1; \ |
52 | } else if (i > 0) { \ |
53 | t = (u - l) / i + 1; \ |
54 | } else { \ |
55 | KMP_DEBUG_ASSERT(i != 0); \ |
56 | t = (l - u) / (-i) + 1; \ |
57 | } \ |
58 | KMP_COUNT_VALUE(stat, t); \ |
59 | KMP_POP_PARTITIONED_TIMER(); \ |
60 | } |
61 | #else |
62 | #define KMP_STATS_LOOP_END(stat) /* Nothing */ |
63 | #endif |
64 | |
65 | #if USE_ITT_BUILD || defined KMP_DEBUG |
66 | static ident_t loc_stub = {.reserved_1: 0, .flags: KMP_IDENT_KMPC, .reserved_2: 0, .reserved_3: 0, .psource: ";unknown;unknown;0;0;;" }; |
67 | static inline void check_loc(ident_t *&loc) { |
68 | if (loc == NULL) |
69 | loc = &loc_stub; // may need to report location info to ittnotify |
70 | } |
71 | #endif |
72 | |
73 | template <typename T> |
74 | static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, |
75 | kmp_int32 schedtype, kmp_int32 *plastiter, |
76 | T *plower, T *pupper, |
77 | typename traits_t<T>::signed_t *pstride, |
78 | typename traits_t<T>::signed_t incr, |
79 | typename traits_t<T>::signed_t chunk |
80 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
81 | , |
82 | void *codeptr |
83 | #endif |
84 | ) { |
85 | KMP_COUNT_BLOCK(OMP_LOOP_STATIC); |
86 | KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); |
87 | KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); |
88 | |
89 | // Clear monotonic/nonmonotonic bits (ignore it) |
90 | schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype); |
91 | |
92 | typedef typename traits_t<T>::unsigned_t UT; |
93 | typedef typename traits_t<T>::signed_t ST; |
94 | /* this all has to be changed back to TID and such.. */ |
95 | kmp_int32 gtid = global_tid; |
96 | kmp_uint32 tid; |
97 | kmp_uint32 nth; |
98 | UT trip_count; |
99 | kmp_team_t *team; |
100 | __kmp_assert_valid_gtid(gtid); |
101 | kmp_info_t *th = __kmp_threads[gtid]; |
102 | |
103 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
104 | ompt_team_info_t *team_info = NULL; |
105 | ompt_task_info_t *task_info = NULL; |
106 | ompt_work_t ompt_work_type = ompt_work_loop; |
107 | |
108 | static kmp_int8 warn = 0; |
109 | |
110 | if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { |
111 | // Only fully initialize variables needed by OMPT if OMPT is enabled. |
112 | team_info = __ompt_get_teaminfo(depth: 0, NULL); |
113 | task_info = __ompt_get_task_info_object(depth: 0); |
114 | // Determine workshare type |
115 | if (loc != NULL) { |
116 | if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { |
117 | ompt_work_type = ompt_work_loop; |
118 | } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { |
119 | ompt_work_type = ompt_work_sections; |
120 | } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { |
121 | ompt_work_type = ompt_work_distribute; |
122 | } else { |
123 | kmp_int8 bool_res = |
124 | KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); |
125 | if (bool_res) |
126 | KMP_WARNING(OmptOutdatedWorkshare); |
127 | } |
128 | KMP_DEBUG_ASSERT(ompt_work_type); |
129 | } |
130 | } |
131 | #endif |
132 | |
133 | KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); |
134 | KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n" , global_tid)); |
135 | #ifdef KMP_DEBUG |
136 | { |
137 | char *buff; |
138 | // create format specifiers before the debug output |
139 | buff = __kmp_str_format( |
140 | "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," |
141 | " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n" , |
142 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, |
143 | traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); |
144 | KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, |
145 | *pstride, incr, chunk)); |
146 | __kmp_str_free(str: &buff); |
147 | } |
148 | #endif |
149 | |
150 | if (__kmp_env_consistency_check) { |
151 | __kmp_push_workshare(gtid: global_tid, ct: ct_pdo, ident: loc); |
152 | if (incr == 0) { |
153 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, |
154 | loc); |
155 | } |
156 | } |
157 | /* special handling for zero-trip loops */ |
158 | if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { |
159 | if (plastiter != NULL) |
160 | *plastiter = FALSE; |
161 | /* leave pupper and plower set to entire iteration space */ |
162 | *pstride = incr; /* value should never be used */ |
163 | // *plower = *pupper - incr; |
164 | // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) |
165 | // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE |
166 | // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. |
167 | #ifdef KMP_DEBUG |
168 | { |
169 | char *buff; |
170 | // create format specifiers before the debug output |
171 | buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " |
172 | "lower=%%%s upper=%%%s stride = %%%s " |
173 | "signed?<%s>, loc = %%s\n" , |
174 | traits_t<T>::spec, traits_t<T>::spec, |
175 | traits_t<ST>::spec, traits_t<T>::spec); |
176 | check_loc(loc); |
177 | KD_TRACE(100, |
178 | (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); |
179 | __kmp_str_free(str: &buff); |
180 | } |
181 | #endif |
182 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n" , global_tid)); |
183 | |
184 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
185 | if (ompt_enabled.ompt_callback_work) { |
186 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
187 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), |
188 | &(task_info->task_data), 0, codeptr); |
189 | } |
190 | #endif |
191 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); |
192 | return; |
193 | } |
194 | |
195 | // Although there are schedule enumerations above kmp_ord_upper which are not |
196 | // schedules for "distribute", the only ones which are useful are dynamic, so |
197 | // cannot be seen here, since this codepath is only executed for static |
198 | // schedules. |
199 | if (schedtype > kmp_ord_upper) { |
200 | // we are in DISTRIBUTE construct |
201 | schedtype += kmp_sch_static - |
202 | kmp_distribute_static; // AC: convert to usual schedule type |
203 | if (th->th.th_team->t.t_serialized > 1) { |
204 | tid = 0; |
205 | team = th->th.th_team; |
206 | } else { |
207 | tid = th->th.th_team->t.t_master_tid; |
208 | team = th->th.th_team->t.t_parent; |
209 | } |
210 | } else { |
211 | tid = __kmp_tid_from_gtid(gtid: global_tid); |
212 | team = th->th.th_team; |
213 | } |
214 | |
215 | /* determine if "for" loop is an active worksharing construct */ |
216 | if (team->t.t_serialized) { |
217 | /* serialized parallel, each thread executes whole iteration space */ |
218 | if (plastiter != NULL) |
219 | *plastiter = TRUE; |
220 | /* leave pupper and plower set to entire iteration space */ |
221 | *pstride = |
222 | (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); |
223 | |
224 | #ifdef KMP_DEBUG |
225 | { |
226 | char *buff; |
227 | // create format specifiers before the debug output |
228 | buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " |
229 | "lower=%%%s upper=%%%s stride = %%%s\n" , |
230 | traits_t<T>::spec, traits_t<T>::spec, |
231 | traits_t<ST>::spec); |
232 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); |
233 | __kmp_str_free(str: &buff); |
234 | } |
235 | #endif |
236 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n" , global_tid)); |
237 | |
238 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
239 | if (ompt_enabled.ompt_callback_work) { |
240 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
241 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), |
242 | &(task_info->task_data), *pstride, codeptr); |
243 | } |
244 | #endif |
245 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); |
246 | return; |
247 | } |
248 | nth = team->t.t_nproc; |
249 | if (nth == 1) { |
250 | if (plastiter != NULL) |
251 | *plastiter = TRUE; |
252 | *pstride = |
253 | (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); |
254 | #ifdef KMP_DEBUG |
255 | { |
256 | char *buff; |
257 | // create format specifiers before the debug output |
258 | buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " |
259 | "lower=%%%s upper=%%%s stride = %%%s\n" , |
260 | traits_t<T>::spec, traits_t<T>::spec, |
261 | traits_t<ST>::spec); |
262 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); |
263 | __kmp_str_free(str: &buff); |
264 | } |
265 | #endif |
266 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n" , global_tid)); |
267 | |
268 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
269 | if (ompt_enabled.ompt_callback_work) { |
270 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
271 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), |
272 | &(task_info->task_data), *pstride, codeptr); |
273 | } |
274 | #endif |
275 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); |
276 | return; |
277 | } |
278 | |
279 | /* compute trip count */ |
280 | if (incr == 1) { |
281 | trip_count = *pupper - *plower + 1; |
282 | } else if (incr == -1) { |
283 | trip_count = *plower - *pupper + 1; |
284 | } else if (incr > 0) { |
285 | // upper-lower can exceed the limit of signed type |
286 | trip_count = (UT)(*pupper - *plower) / incr + 1; |
287 | } else { |
288 | KMP_DEBUG_ASSERT(incr != 0); |
289 | trip_count = (UT)(*plower - *pupper) / (-incr) + 1; |
290 | } |
291 | |
292 | #if KMP_STATS_ENABLED |
293 | if (KMP_MASTER_GTID(gtid)) { |
294 | KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); |
295 | } |
296 | #endif |
297 | |
298 | if (__kmp_env_consistency_check) { |
299 | /* tripcount overflow? */ |
300 | if (trip_count == 0 && *pupper != *plower) { |
301 | __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, |
302 | loc); |
303 | } |
304 | } |
305 | |
306 | /* compute remaining parameters */ |
307 | switch (schedtype) { |
308 | case kmp_sch_static: { |
309 | if (trip_count < nth) { |
310 | KMP_DEBUG_ASSERT( |
311 | __kmp_static == kmp_sch_static_greedy || |
312 | __kmp_static == |
313 | kmp_sch_static_balanced); // Unknown static scheduling type. |
314 | if (tid < trip_count) { |
315 | *pupper = *plower = *plower + tid * incr; |
316 | } else { |
317 | // set bounds so non-active threads execute no iterations |
318 | *plower = *pupper + (incr > 0 ? 1 : -1); |
319 | } |
320 | if (plastiter != NULL) |
321 | *plastiter = (tid == trip_count - 1); |
322 | } else { |
323 | KMP_DEBUG_ASSERT(nth != 0); |
324 | if (__kmp_static == kmp_sch_static_balanced) { |
325 | UT small_chunk = trip_count / nth; |
326 | UT = trip_count % nth; |
327 | *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); |
328 | *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); |
329 | if (plastiter != NULL) |
330 | *plastiter = (tid == nth - 1); |
331 | } else { |
332 | T big_chunk_inc_count = |
333 | (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; |
334 | T old_upper = *pupper; |
335 | |
336 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); |
337 | // Unknown static scheduling type. |
338 | |
339 | *plower += tid * big_chunk_inc_count; |
340 | *pupper = *plower + big_chunk_inc_count - incr; |
341 | if (incr > 0) { |
342 | if (*pupper < *plower) |
343 | *pupper = traits_t<T>::max_value; |
344 | if (plastiter != NULL) |
345 | *plastiter = *plower <= old_upper && *pupper > old_upper - incr; |
346 | if (*pupper > old_upper) |
347 | *pupper = old_upper; // tracker C73258 |
348 | } else { |
349 | if (*pupper > *plower) |
350 | *pupper = traits_t<T>::min_value; |
351 | if (plastiter != NULL) |
352 | *plastiter = *plower >= old_upper && *pupper < old_upper - incr; |
353 | if (*pupper < old_upper) |
354 | *pupper = old_upper; // tracker C73258 |
355 | } |
356 | } |
357 | } |
358 | *pstride = trip_count; |
359 | break; |
360 | } |
361 | case kmp_sch_static_chunked: { |
362 | ST span; |
363 | UT nchunks; |
364 | KMP_DEBUG_ASSERT(chunk != 0); |
365 | if (chunk < 1) |
366 | chunk = 1; |
367 | else if ((UT)chunk > trip_count) |
368 | chunk = trip_count; |
369 | nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); |
370 | span = chunk * incr; |
371 | if (nchunks < nth) { |
372 | *pstride = span * nchunks; |
373 | if (tid < nchunks) { |
374 | *plower = *plower + (span * tid); |
375 | *pupper = *plower + span - incr; |
376 | } else { |
377 | *plower = *pupper + (incr > 0 ? 1 : -1); |
378 | } |
379 | } else { |
380 | *pstride = span * nth; |
381 | *plower = *plower + (span * tid); |
382 | *pupper = *plower + span - incr; |
383 | } |
384 | if (plastiter != NULL) |
385 | *plastiter = (tid == (nchunks - 1) % nth); |
386 | break; |
387 | } |
388 | case kmp_sch_static_balanced_chunked: { |
389 | T old_upper = *pupper; |
390 | KMP_DEBUG_ASSERT(nth != 0); |
391 | // round up to make sure the chunk is enough to cover all iterations |
392 | UT span = (trip_count + nth - 1) / nth; |
393 | |
394 | // perform chunk adjustment |
395 | chunk = (span + chunk - 1) & ~(chunk - 1); |
396 | |
397 | span = chunk * incr; |
398 | *plower = *plower + (span * tid); |
399 | *pupper = *plower + span - incr; |
400 | if (incr > 0) { |
401 | if (*pupper > old_upper) |
402 | *pupper = old_upper; |
403 | } else if (*pupper < old_upper) |
404 | *pupper = old_upper; |
405 | |
406 | if (plastiter != NULL) { |
407 | KMP_DEBUG_ASSERT(chunk != 0); |
408 | *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); |
409 | } |
410 | break; |
411 | } |
412 | default: |
413 | KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type" ); |
414 | break; |
415 | } |
416 | |
417 | #if USE_ITT_BUILD |
418 | // Report loop metadata |
419 | if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && |
420 | __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && |
421 | team->t.t_active_level == 1) { |
422 | kmp_uint64 cur_chunk = chunk; |
423 | check_loc(loc); |
424 | // Calculate chunk in case it was not specified; it is specified for |
425 | // kmp_sch_static_chunked |
426 | if (schedtype == kmp_sch_static) { |
427 | KMP_DEBUG_ASSERT(nth != 0); |
428 | cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); |
429 | } |
430 | // 0 - "static" schedule |
431 | __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); |
432 | } |
433 | #endif |
434 | #ifdef KMP_DEBUG |
435 | { |
436 | char *buff; |
437 | // create format specifiers before the debug output |
438 | buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " |
439 | "upper=%%%s stride = %%%s signed?<%s>\n" , |
440 | traits_t<T>::spec, traits_t<T>::spec, |
441 | traits_t<ST>::spec, traits_t<T>::spec); |
442 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); |
443 | __kmp_str_free(str: &buff); |
444 | } |
445 | #endif |
446 | KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n" , global_tid)); |
447 | |
448 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
449 | if (ompt_enabled.ompt_callback_work) { |
450 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
451 | ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), |
452 | &(task_info->task_data), trip_count, codeptr); |
453 | } |
454 | if (ompt_enabled.ompt_callback_dispatch) { |
455 | ompt_dispatch_t dispatch_type; |
456 | ompt_data_t instance = ompt_data_none; |
457 | ompt_dispatch_chunk_t dispatch_chunk; |
458 | if (ompt_work_type == ompt_work_sections) { |
459 | dispatch_type = ompt_dispatch_section; |
460 | instance.ptr = codeptr; |
461 | } else { |
462 | OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr); |
463 | dispatch_type = (ompt_work_type == ompt_work_distribute) |
464 | ? ompt_dispatch_distribute_chunk |
465 | : ompt_dispatch_ws_loop_chunk; |
466 | instance.ptr = &dispatch_chunk; |
467 | } |
468 | ompt_callbacks.ompt_callback(ompt_callback_dispatch)( |
469 | &(team_info->parallel_data), &(task_info->task_data), dispatch_type, |
470 | instance); |
471 | } |
472 | #endif |
473 | |
474 | KMP_STATS_LOOP_END(OMP_loop_static_iterations); |
475 | return; |
476 | } |
477 | |
478 | template <typename T> |
479 | static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, |
480 | kmp_int32 schedule, kmp_int32 *plastiter, |
481 | T *plower, T *pupper, T *pupperDist, |
482 | typename traits_t<T>::signed_t *pstride, |
483 | typename traits_t<T>::signed_t incr, |
484 | typename traits_t<T>::signed_t chunk |
485 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
486 | , |
487 | void *codeptr |
488 | #endif |
489 | ) { |
490 | KMP_COUNT_BLOCK(OMP_DISTRIBUTE); |
491 | KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); |
492 | KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); |
493 | typedef typename traits_t<T>::unsigned_t UT; |
494 | typedef typename traits_t<T>::signed_t ST; |
495 | kmp_uint32 tid; |
496 | kmp_uint32 nth; |
497 | kmp_uint32 team_id; |
498 | kmp_uint32 nteams; |
499 | UT trip_count; |
500 | kmp_team_t *team; |
501 | kmp_info_t *th; |
502 | |
503 | KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); |
504 | KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n" , gtid)); |
505 | __kmp_assert_valid_gtid(gtid); |
506 | #ifdef KMP_DEBUG |
507 | { |
508 | char *buff; |
509 | // create format specifiers before the debug output |
510 | buff = __kmp_str_format( |
511 | "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " |
512 | "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n" , |
513 | traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, |
514 | traits_t<ST>::spec, traits_t<T>::spec); |
515 | KD_TRACE(100, |
516 | (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); |
517 | __kmp_str_free(str: &buff); |
518 | } |
519 | #endif |
520 | |
521 | if (__kmp_env_consistency_check) { |
522 | __kmp_push_workshare(gtid, ct: ct_pdo, ident: loc); |
523 | if (incr == 0) { |
524 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, |
525 | loc); |
526 | } |
527 | if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { |
528 | // The loop is illegal. |
529 | // Some zero-trip loops maintained by compiler, e.g.: |
530 | // for(i=10;i<0;++i) // lower >= upper - run-time check |
531 | // for(i=0;i>10;--i) // lower <= upper - run-time check |
532 | // for(i=0;i>10;++i) // incr > 0 - compile-time check |
533 | // for(i=10;i<0;--i) // incr < 0 - compile-time check |
534 | // Compiler does not check the following illegal loops: |
535 | // for(i=0;i<10;i+=incr) // where incr<0 |
536 | // for(i=10;i>0;i-=incr) // where incr<0 |
537 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); |
538 | } |
539 | } |
540 | tid = __kmp_tid_from_gtid(gtid); |
541 | th = __kmp_threads[gtid]; |
542 | nth = th->th.th_team_nproc; |
543 | team = th->th.th_team; |
544 | KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct |
545 | nteams = th->th.th_teams_size.nteams; |
546 | team_id = team->t.t_master_tid; |
547 | KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); |
548 | |
549 | // compute global trip count |
550 | if (incr == 1) { |
551 | trip_count = *pupper - *plower + 1; |
552 | } else if (incr == -1) { |
553 | trip_count = *plower - *pupper + 1; |
554 | } else if (incr > 0) { |
555 | // upper-lower can exceed the limit of signed type |
556 | trip_count = (UT)(*pupper - *plower) / incr + 1; |
557 | } else { |
558 | KMP_DEBUG_ASSERT(incr != 0); |
559 | trip_count = (UT)(*plower - *pupper) / (-incr) + 1; |
560 | } |
561 | |
562 | *pstride = *pupper - *plower; // just in case (can be unused) |
563 | if (trip_count <= nteams) { |
564 | KMP_DEBUG_ASSERT( |
565 | __kmp_static == kmp_sch_static_greedy || |
566 | __kmp_static == |
567 | kmp_sch_static_balanced); // Unknown static scheduling type. |
568 | // only primary threads of some teams get single iteration, other threads |
569 | // get nothing |
570 | if (team_id < trip_count && tid == 0) { |
571 | *pupper = *pupperDist = *plower = *plower + team_id * incr; |
572 | } else { |
573 | *pupperDist = *pupper; |
574 | *plower = *pupper + incr; // compiler should skip loop body |
575 | } |
576 | if (plastiter != NULL) |
577 | *plastiter = (tid == 0 && team_id == trip_count - 1); |
578 | } else { |
579 | // Get the team's chunk first (each team gets at most one chunk) |
580 | KMP_DEBUG_ASSERT(nteams != 0); |
581 | if (__kmp_static == kmp_sch_static_balanced) { |
582 | UT chunkD = trip_count / nteams; |
583 | UT = trip_count % nteams; |
584 | *plower += |
585 | incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); |
586 | *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); |
587 | if (plastiter != NULL) |
588 | *plastiter = (team_id == nteams - 1); |
589 | } else { |
590 | T chunk_inc_count = |
591 | (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; |
592 | T upper = *pupper; |
593 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); |
594 | // Unknown static scheduling type. |
595 | *plower += team_id * chunk_inc_count; |
596 | *pupperDist = *plower + chunk_inc_count - incr; |
597 | // Check/correct bounds if needed |
598 | if (incr > 0) { |
599 | if (*pupperDist < *plower) |
600 | *pupperDist = traits_t<T>::max_value; |
601 | if (plastiter != NULL) |
602 | *plastiter = *plower <= upper && *pupperDist > upper - incr; |
603 | if (*pupperDist > upper) |
604 | *pupperDist = upper; // tracker C73258 |
605 | if (*plower > *pupperDist) { |
606 | *pupper = *pupperDist; // no iterations available for the team |
607 | goto end; |
608 | } |
609 | } else { |
610 | if (*pupperDist > *plower) |
611 | *pupperDist = traits_t<T>::min_value; |
612 | if (plastiter != NULL) |
613 | *plastiter = *plower >= upper && *pupperDist < upper - incr; |
614 | if (*pupperDist < upper) |
615 | *pupperDist = upper; // tracker C73258 |
616 | if (*plower < *pupperDist) { |
617 | *pupper = *pupperDist; // no iterations available for the team |
618 | goto end; |
619 | } |
620 | } |
621 | } |
622 | // Get the parallel loop chunk now (for thread) |
623 | // compute trip count for team's chunk |
624 | if (incr == 1) { |
625 | trip_count = *pupperDist - *plower + 1; |
626 | } else if (incr == -1) { |
627 | trip_count = *plower - *pupperDist + 1; |
628 | } else if (incr > 1) { |
629 | // upper-lower can exceed the limit of signed type |
630 | trip_count = (UT)(*pupperDist - *plower) / incr + 1; |
631 | } else { |
632 | KMP_DEBUG_ASSERT(incr != 0); |
633 | trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; |
634 | } |
635 | KMP_DEBUG_ASSERT(trip_count); |
636 | switch (schedule) { |
637 | case kmp_sch_static: { |
638 | if (trip_count <= nth) { |
639 | KMP_DEBUG_ASSERT( |
640 | __kmp_static == kmp_sch_static_greedy || |
641 | __kmp_static == |
642 | kmp_sch_static_balanced); // Unknown static scheduling type. |
643 | if (tid < trip_count) |
644 | *pupper = *plower = *plower + tid * incr; |
645 | else |
646 | *plower = *pupper + incr; // no iterations available |
647 | if (plastiter != NULL) |
648 | if (*plastiter != 0 && !(tid == trip_count - 1)) |
649 | *plastiter = 0; |
650 | } else { |
651 | KMP_DEBUG_ASSERT(nth != 0); |
652 | if (__kmp_static == kmp_sch_static_balanced) { |
653 | UT chunkL = trip_count / nth; |
654 | UT = trip_count % nth; |
655 | *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); |
656 | *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); |
657 | if (plastiter != NULL) |
658 | if (*plastiter != 0 && !(tid == nth - 1)) |
659 | *plastiter = 0; |
660 | } else { |
661 | T chunk_inc_count = |
662 | (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; |
663 | T upper = *pupperDist; |
664 | KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); |
665 | // Unknown static scheduling type. |
666 | *plower += tid * chunk_inc_count; |
667 | *pupper = *plower + chunk_inc_count - incr; |
668 | if (incr > 0) { |
669 | if (*pupper < *plower) |
670 | *pupper = traits_t<T>::max_value; |
671 | if (plastiter != NULL) |
672 | if (*plastiter != 0 && |
673 | !(*plower <= upper && *pupper > upper - incr)) |
674 | *plastiter = 0; |
675 | if (*pupper > upper) |
676 | *pupper = upper; // tracker C73258 |
677 | } else { |
678 | if (*pupper > *plower) |
679 | *pupper = traits_t<T>::min_value; |
680 | if (plastiter != NULL) |
681 | if (*plastiter != 0 && |
682 | !(*plower >= upper && *pupper < upper - incr)) |
683 | *plastiter = 0; |
684 | if (*pupper < upper) |
685 | *pupper = upper; // tracker C73258 |
686 | } |
687 | } |
688 | } |
689 | break; |
690 | } |
691 | case kmp_sch_static_chunked: { |
692 | ST span; |
693 | if (chunk < 1) |
694 | chunk = 1; |
695 | span = chunk * incr; |
696 | *pstride = span * nth; |
697 | *plower = *plower + (span * tid); |
698 | *pupper = *plower + span - incr; |
699 | if (plastiter != NULL) { |
700 | KMP_DEBUG_ASSERT(chunk != 0); |
701 | if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) |
702 | *plastiter = 0; |
703 | } |
704 | break; |
705 | } |
706 | default: |
707 | KMP_ASSERT2(0, |
708 | "__kmpc_dist_for_static_init: unknown loop scheduling type" ); |
709 | break; |
710 | } |
711 | } |
712 | end:; |
713 | #ifdef KMP_DEBUG |
714 | { |
715 | char *buff; |
716 | // create format specifiers before the debug output |
717 | buff = __kmp_str_format( |
718 | "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " |
719 | "stride=%%%s signed?<%s>\n" , |
720 | traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, |
721 | traits_t<ST>::spec, traits_t<T>::spec); |
722 | KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); |
723 | __kmp_str_free(str: &buff); |
724 | } |
725 | #endif |
726 | KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n" , gtid)); |
727 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
728 | if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { |
729 | ompt_team_info_t *team_info = __ompt_get_teaminfo(depth: 0, NULL); |
730 | ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: 0); |
731 | if (ompt_enabled.ompt_callback_work) { |
732 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
733 | ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), |
734 | &(task_info->task_data), 0, codeptr); |
735 | } |
736 | if (ompt_enabled.ompt_callback_dispatch) { |
737 | ompt_data_t instance = ompt_data_none; |
738 | ompt_dispatch_chunk_t dispatch_chunk; |
739 | OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr); |
740 | instance.ptr = &dispatch_chunk; |
741 | ompt_callbacks.ompt_callback(ompt_callback_dispatch)( |
742 | &(team_info->parallel_data), &(task_info->task_data), |
743 | ompt_dispatch_distribute_chunk, instance); |
744 | } |
745 | } |
746 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
747 | KMP_STATS_LOOP_END(OMP_distribute_iterations); |
748 | return; |
749 | } |
750 | |
751 | template <typename T> |
752 | static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, |
753 | kmp_int32 *p_last, T *p_lb, T *p_ub, |
754 | typename traits_t<T>::signed_t *p_st, |
755 | typename traits_t<T>::signed_t incr, |
756 | typename traits_t<T>::signed_t chunk) { |
757 | // The routine returns the first chunk distributed to the team and |
758 | // stride for next chunks calculation. |
759 | // Last iteration flag set for the team that will execute |
760 | // the last iteration of the loop. |
761 | // The routine is called for dist_schedule(static,chunk) only. |
762 | typedef typename traits_t<T>::unsigned_t UT; |
763 | typedef typename traits_t<T>::signed_t ST; |
764 | kmp_uint32 team_id; |
765 | kmp_uint32 nteams; |
766 | UT trip_count; |
767 | T lower; |
768 | T upper; |
769 | ST span; |
770 | kmp_team_t *team; |
771 | kmp_info_t *th; |
772 | |
773 | KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); |
774 | KE_TRACE(10, ("__kmp_team_static_init called (%d)\n" , gtid)); |
775 | __kmp_assert_valid_gtid(gtid); |
776 | #ifdef KMP_DEBUG |
777 | { |
778 | char *buff; |
779 | // create format specifiers before the debug output |
780 | buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " |
781 | "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n" , |
782 | traits_t<T>::spec, traits_t<T>::spec, |
783 | traits_t<ST>::spec, traits_t<ST>::spec, |
784 | traits_t<T>::spec); |
785 | KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); |
786 | __kmp_str_free(str: &buff); |
787 | } |
788 | #endif |
789 | |
790 | lower = *p_lb; |
791 | upper = *p_ub; |
792 | if (__kmp_env_consistency_check) { |
793 | if (incr == 0) { |
794 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, |
795 | loc); |
796 | } |
797 | if (incr > 0 ? (upper < lower) : (lower < upper)) { |
798 | // The loop is illegal. |
799 | // Some zero-trip loops maintained by compiler, e.g.: |
800 | // for(i=10;i<0;++i) // lower >= upper - run-time check |
801 | // for(i=0;i>10;--i) // lower <= upper - run-time check |
802 | // for(i=0;i>10;++i) // incr > 0 - compile-time check |
803 | // for(i=10;i<0;--i) // incr < 0 - compile-time check |
804 | // Compiler does not check the following illegal loops: |
805 | // for(i=0;i<10;i+=incr) // where incr<0 |
806 | // for(i=10;i>0;i-=incr) // where incr<0 |
807 | __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); |
808 | } |
809 | } |
810 | th = __kmp_threads[gtid]; |
811 | team = th->th.th_team; |
812 | KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct |
813 | nteams = th->th.th_teams_size.nteams; |
814 | team_id = team->t.t_master_tid; |
815 | KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); |
816 | |
817 | // compute trip count |
818 | if (incr == 1) { |
819 | trip_count = upper - lower + 1; |
820 | } else if (incr == -1) { |
821 | trip_count = lower - upper + 1; |
822 | } else if (incr > 0) { |
823 | // upper-lower can exceed the limit of signed type |
824 | trip_count = (UT)(upper - lower) / incr + 1; |
825 | } else { |
826 | KMP_DEBUG_ASSERT(incr != 0); |
827 | trip_count = (UT)(lower - upper) / (-incr) + 1; |
828 | } |
829 | if (chunk < 1) |
830 | chunk = 1; |
831 | span = chunk * incr; |
832 | *p_st = span * nteams; |
833 | *p_lb = lower + (span * team_id); |
834 | *p_ub = *p_lb + span - incr; |
835 | if (p_last != NULL) { |
836 | KMP_DEBUG_ASSERT(chunk != 0); |
837 | *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); |
838 | } |
839 | // Correct upper bound if needed |
840 | if (incr > 0) { |
841 | if (*p_ub < *p_lb) // overflow? |
842 | *p_ub = traits_t<T>::max_value; |
843 | if (*p_ub > upper) |
844 | *p_ub = upper; // tracker C73258 |
845 | } else { // incr < 0 |
846 | if (*p_ub > *p_lb) |
847 | *p_ub = traits_t<T>::min_value; |
848 | if (*p_ub < upper) |
849 | *p_ub = upper; // tracker C73258 |
850 | } |
851 | #ifdef KMP_DEBUG |
852 | { |
853 | char *buff; |
854 | // create format specifiers before the debug output |
855 | buff = |
856 | __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " |
857 | "iter=(%%%s, %%%s, %%%s) chunk %%%s\n" , |
858 | traits_t<T>::spec, traits_t<T>::spec, |
859 | traits_t<ST>::spec, traits_t<ST>::spec); |
860 | KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); |
861 | __kmp_str_free(str: &buff); |
862 | } |
863 | #endif |
864 | } |
865 | |
866 | //------------------------------------------------------------------------------ |
867 | extern "C" { |
868 | /*! |
869 | @ingroup WORK_SHARING |
870 | @param loc Source code location |
871 | @param gtid Global thread id of this thread |
872 | @param schedtype Scheduling type |
873 | @param plastiter Pointer to the "last iteration" flag |
874 | @param plower Pointer to the lower bound |
875 | @param pupper Pointer to the upper bound |
876 | @param pstride Pointer to the stride |
877 | @param incr Loop increment |
878 | @param chunk The chunk size |
879 | |
880 | Each of the four functions here are identical apart from the argument types. |
881 | |
882 | The functions compute the upper and lower bounds and stride to be used for the |
883 | set of iterations to be executed by the current thread from the statically |
884 | scheduled loop that is described by the initial values of the bounds, stride, |
885 | increment and chunk size. |
886 | |
887 | @{ |
888 | */ |
889 | void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, |
890 | kmp_int32 *plastiter, kmp_int32 *plower, |
891 | kmp_int32 *pupper, kmp_int32 *pstride, |
892 | kmp_int32 incr, kmp_int32 chunk) { |
893 | __kmp_for_static_init<kmp_int32>(loc, global_tid: gtid, schedtype, plastiter, plower, |
894 | pupper, pstride, incr, chunk |
895 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
896 | , |
897 | OMPT_GET_RETURN_ADDRESS(0) |
898 | #endif |
899 | ); |
900 | } |
901 | |
902 | /*! |
903 | See @ref __kmpc_for_static_init_4 |
904 | */ |
905 | void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, |
906 | kmp_int32 schedtype, kmp_int32 *plastiter, |
907 | kmp_uint32 *plower, kmp_uint32 *pupper, |
908 | kmp_int32 *pstride, kmp_int32 incr, |
909 | kmp_int32 chunk) { |
910 | __kmp_for_static_init<kmp_uint32>(loc, global_tid: gtid, schedtype, plastiter, plower, |
911 | pupper, pstride, incr, chunk |
912 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
913 | , |
914 | OMPT_GET_RETURN_ADDRESS(0) |
915 | #endif |
916 | ); |
917 | } |
918 | |
919 | /*! |
920 | See @ref __kmpc_for_static_init_4 |
921 | */ |
922 | void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, |
923 | kmp_int32 *plastiter, kmp_int64 *plower, |
924 | kmp_int64 *pupper, kmp_int64 *pstride, |
925 | kmp_int64 incr, kmp_int64 chunk) { |
926 | __kmp_for_static_init<kmp_int64>(loc, global_tid: gtid, schedtype, plastiter, plower, |
927 | pupper, pstride, incr, chunk |
928 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
929 | , |
930 | OMPT_GET_RETURN_ADDRESS(0) |
931 | #endif |
932 | ); |
933 | } |
934 | |
935 | /*! |
936 | See @ref __kmpc_for_static_init_4 |
937 | */ |
938 | void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, |
939 | kmp_int32 schedtype, kmp_int32 *plastiter, |
940 | kmp_uint64 *plower, kmp_uint64 *pupper, |
941 | kmp_int64 *pstride, kmp_int64 incr, |
942 | kmp_int64 chunk) { |
943 | __kmp_for_static_init<kmp_uint64>(loc, global_tid: gtid, schedtype, plastiter, plower, |
944 | pupper, pstride, incr, chunk |
945 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
946 | , |
947 | OMPT_GET_RETURN_ADDRESS(0) |
948 | #endif |
949 | ); |
950 | } |
951 | /*! |
952 | @} |
953 | */ |
954 | |
955 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
956 | #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0) |
957 | #else |
958 | #define OMPT_CODEPTR_ARG |
959 | #endif |
960 | |
961 | /*! |
962 | @ingroup WORK_SHARING |
963 | @param loc Source code location |
964 | @param gtid Global thread id of this thread |
965 | @param schedule Scheduling type for the parallel loop |
966 | @param plastiter Pointer to the "last iteration" flag |
967 | @param plower Pointer to the lower bound |
968 | @param pupper Pointer to the upper bound of loop chunk |
969 | @param pupperD Pointer to the upper bound of dist_chunk |
970 | @param pstride Pointer to the stride for parallel loop |
971 | @param incr Loop increment |
972 | @param chunk The chunk size for the parallel loop |
973 | |
974 | Each of the four functions here are identical apart from the argument types. |
975 | |
976 | The functions compute the upper and lower bounds and strides to be used for the |
977 | set of iterations to be executed by the current thread from the statically |
978 | scheduled loop that is described by the initial values of the bounds, strides, |
979 | increment and chunks for parallel loop and distribute constructs. |
980 | |
981 | @{ |
982 | */ |
983 | void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, |
984 | kmp_int32 schedule, kmp_int32 *plastiter, |
985 | kmp_int32 *plower, kmp_int32 *pupper, |
986 | kmp_int32 *pupperD, kmp_int32 *pstride, |
987 | kmp_int32 incr, kmp_int32 chunk) { |
988 | __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, |
989 | pupper, pupperDist: pupperD, pstride, incr, |
990 | chunk OMPT_CODEPTR_ARG); |
991 | } |
992 | |
993 | /*! |
994 | See @ref __kmpc_dist_for_static_init_4 |
995 | */ |
996 | void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, |
997 | kmp_int32 schedule, kmp_int32 *plastiter, |
998 | kmp_uint32 *plower, kmp_uint32 *pupper, |
999 | kmp_uint32 *pupperD, kmp_int32 *pstride, |
1000 | kmp_int32 incr, kmp_int32 chunk) { |
1001 | __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, |
1002 | pupper, pupperDist: pupperD, pstride, incr, |
1003 | chunk OMPT_CODEPTR_ARG); |
1004 | } |
1005 | |
1006 | /*! |
1007 | See @ref __kmpc_dist_for_static_init_4 |
1008 | */ |
1009 | void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, |
1010 | kmp_int32 schedule, kmp_int32 *plastiter, |
1011 | kmp_int64 *plower, kmp_int64 *pupper, |
1012 | kmp_int64 *pupperD, kmp_int64 *pstride, |
1013 | kmp_int64 incr, kmp_int64 chunk) { |
1014 | __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, |
1015 | pupper, pupperDist: pupperD, pstride, incr, |
1016 | chunk OMPT_CODEPTR_ARG); |
1017 | } |
1018 | |
1019 | /*! |
1020 | See @ref __kmpc_dist_for_static_init_4 |
1021 | */ |
1022 | void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, |
1023 | kmp_int32 schedule, kmp_int32 *plastiter, |
1024 | kmp_uint64 *plower, kmp_uint64 *pupper, |
1025 | kmp_uint64 *pupperD, kmp_int64 *pstride, |
1026 | kmp_int64 incr, kmp_int64 chunk) { |
1027 | __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, |
1028 | pupper, pupperDist: pupperD, pstride, incr, |
1029 | chunk OMPT_CODEPTR_ARG); |
1030 | } |
1031 | /*! |
1032 | @} |
1033 | */ |
1034 | |
1035 | //------------------------------------------------------------------------------ |
1036 | // Auxiliary routines for Distribute Parallel Loop construct implementation |
1037 | // Transfer call to template< type T > |
1038 | // __kmp_team_static_init( ident_t *loc, int gtid, |
1039 | // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) |
1040 | |
1041 | /*! |
1042 | @ingroup WORK_SHARING |
1043 | @{ |
1044 | @param loc Source location |
1045 | @param gtid Global thread id |
1046 | @param p_last pointer to last iteration flag |
1047 | @param p_lb pointer to Lower bound |
1048 | @param p_ub pointer to Upper bound |
1049 | @param p_st Step (or increment if you prefer) |
1050 | @param incr Loop increment |
1051 | @param chunk The chunk size to block with |
1052 | |
1053 | The functions compute the upper and lower bounds and stride to be used for the |
1054 | set of iterations to be executed by the current team from the statically |
1055 | scheduled loop that is described by the initial values of the bounds, stride, |
1056 | increment and chunk for the distribute construct as part of composite distribute |
1057 | parallel loop construct. These functions are all identical apart from the types |
1058 | of the arguments. |
1059 | */ |
1060 | |
1061 | void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, |
1062 | kmp_int32 *p_lb, kmp_int32 *p_ub, |
1063 | kmp_int32 *p_st, kmp_int32 incr, |
1064 | kmp_int32 chunk) { |
1065 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
1066 | __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, |
1067 | chunk); |
1068 | } |
1069 | |
1070 | /*! |
1071 | See @ref __kmpc_team_static_init_4 |
1072 | */ |
1073 | void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, |
1074 | kmp_uint32 *p_lb, kmp_uint32 *p_ub, |
1075 | kmp_int32 *p_st, kmp_int32 incr, |
1076 | kmp_int32 chunk) { |
1077 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
1078 | __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, |
1079 | chunk); |
1080 | } |
1081 | |
1082 | /*! |
1083 | See @ref __kmpc_team_static_init_4 |
1084 | */ |
1085 | void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, |
1086 | kmp_int64 *p_lb, kmp_int64 *p_ub, |
1087 | kmp_int64 *p_st, kmp_int64 incr, |
1088 | kmp_int64 chunk) { |
1089 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
1090 | __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, |
1091 | chunk); |
1092 | } |
1093 | |
1094 | /*! |
1095 | See @ref __kmpc_team_static_init_4 |
1096 | */ |
1097 | void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, |
1098 | kmp_uint64 *p_lb, kmp_uint64 *p_ub, |
1099 | kmp_int64 *p_st, kmp_int64 incr, |
1100 | kmp_int64 chunk) { |
1101 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
1102 | __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, |
1103 | chunk); |
1104 | } |
1105 | /*! |
1106 | @} |
1107 | */ |
1108 | |
1109 | } // extern "C" |
1110 | |