1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 KMP_DEBUG_ASSERT(i != 0); \
56 t = (l - u) / (-i) + 1; \
57 } \
58 KMP_COUNT_VALUE(stat, t); \
59 KMP_POP_PARTITIONED_TIMER(); \
60 }
61#else
62#define KMP_STATS_LOOP_END(stat) /* Nothing */
63#endif
64
65#if USE_ITT_BUILD || defined KMP_DEBUG
66static ident_t loc_stub = {.reserved_1: 0, .flags: KMP_IDENT_KMPC, .reserved_2: 0, .reserved_3: 0, .psource: ";unknown;unknown;0;0;;"};
67static inline void check_loc(ident_t *&loc) {
68 if (loc == NULL)
69 loc = &loc_stub; // may need to report location info to ittnotify
70}
71#endif
72
73template <typename T>
74static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
75 kmp_int32 schedtype, kmp_int32 *plastiter,
76 T *plower, T *pupper,
77 typename traits_t<T>::signed_t *pstride,
78 typename traits_t<T>::signed_t incr,
79 typename traits_t<T>::signed_t chunk
80#if OMPT_SUPPORT && OMPT_OPTIONAL
81 ,
82 void *codeptr
83#endif
84) {
85 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
87 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
88
89 // Clear monotonic/nonmonotonic bits (ignore it)
90 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
91
92 typedef typename traits_t<T>::unsigned_t UT;
93 typedef typename traits_t<T>::signed_t ST;
94 /* this all has to be changed back to TID and such.. */
95 kmp_int32 gtid = global_tid;
96 kmp_uint32 tid;
97 kmp_uint32 nth;
98 UT trip_count;
99 kmp_team_t *team;
100 __kmp_assert_valid_gtid(gtid);
101 kmp_info_t *th = __kmp_threads[gtid];
102
103#if OMPT_SUPPORT && OMPT_OPTIONAL
104 ompt_team_info_t *team_info = NULL;
105 ompt_task_info_t *task_info = NULL;
106 ompt_work_t ompt_work_type = ompt_work_loop_static;
107
108 static kmp_int8 warn = 0;
109
110 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
111 // Only fully initialize variables needed by OMPT if OMPT is enabled.
112 team_info = __ompt_get_teaminfo(depth: 0, NULL);
113 task_info = __ompt_get_task_info_object(depth: 0);
114 // Determine workshare type
115 if (loc != NULL) {
116 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
117 ompt_work_type = ompt_work_loop_static;
118 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
119 ompt_work_type = ompt_work_sections;
120 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
121 ompt_work_type = ompt_work_distribute;
122 } else {
123 kmp_int8 bool_res =
124 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
125 if (bool_res)
126 KMP_WARNING(OmptOutdatedWorkshare);
127 }
128 KMP_DEBUG_ASSERT(ompt_work_type);
129 }
130 }
131#endif
132
133 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
134 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
135#ifdef KMP_DEBUG
136 {
137 char *buff;
138 // create format specifiers before the debug output
139 buff = __kmp_str_format(
140 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
141 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
142 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
143 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
144 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
145 *pstride, incr, chunk));
146 __kmp_str_free(str: &buff);
147 }
148#endif
149
150 if (__kmp_env_consistency_check) {
151 __kmp_push_workshare(gtid: global_tid, ct: ct_pdo, ident: loc);
152 if (incr == 0) {
153 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
154 loc);
155 }
156 }
157 /* special handling for zero-trip loops */
158 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
159 if (plastiter != NULL)
160 *plastiter = FALSE;
161 /* leave pupper and plower set to entire iteration space */
162 *pstride = incr; /* value should never be used */
163// *plower = *pupper - incr;
164// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
165// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
166// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
167#ifdef KMP_DEBUG
168 {
169 char *buff;
170 // create format specifiers before the debug output
171 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
172 "lower=%%%s upper=%%%s stride = %%%s "
173 "signed?<%s>, loc = %%s\n",
174 traits_t<T>::spec, traits_t<T>::spec,
175 traits_t<ST>::spec, traits_t<T>::spec);
176 check_loc(loc);
177 KD_TRACE(100,
178 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
179 __kmp_str_free(str: &buff);
180 }
181#endif
182 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
183
184#if OMPT_SUPPORT && OMPT_OPTIONAL
185 if (ompt_enabled.ompt_callback_work) {
186 ompt_callbacks.ompt_callback(ompt_callback_work)(
187 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
188 &(task_info->task_data), 0, codeptr);
189 }
190#endif
191 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
192 return;
193 }
194
195 // Although there are schedule enumerations above kmp_ord_upper which are not
196 // schedules for "distribute", the only ones which are useful are dynamic, so
197 // cannot be seen here, since this codepath is only executed for static
198 // schedules.
199 if (schedtype > kmp_ord_upper) {
200 // we are in DISTRIBUTE construct
201 schedtype += kmp_sch_static -
202 kmp_distribute_static; // AC: convert to usual schedule type
203 if (th->th.th_team->t.t_serialized > 1) {
204 tid = 0;
205 team = th->th.th_team;
206 } else {
207 tid = th->th.th_team->t.t_master_tid;
208 team = th->th.th_team->t.t_parent;
209 }
210 } else {
211 tid = __kmp_tid_from_gtid(gtid: global_tid);
212 team = th->th.th_team;
213 }
214
215 /* determine if "for" loop is an active worksharing construct */
216 if (team->t.t_serialized) {
217 /* serialized parallel, each thread executes whole iteration space */
218 if (plastiter != NULL)
219 *plastiter = TRUE;
220 /* leave pupper and plower set to entire iteration space */
221 *pstride =
222 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
223
224#ifdef KMP_DEBUG
225 {
226 char *buff;
227 // create format specifiers before the debug output
228 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
229 "lower=%%%s upper=%%%s stride = %%%s\n",
230 traits_t<T>::spec, traits_t<T>::spec,
231 traits_t<ST>::spec);
232 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
233 __kmp_str_free(str: &buff);
234 }
235#endif
236 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
237
238#if OMPT_SUPPORT && OMPT_OPTIONAL
239 if (ompt_enabled.ompt_callback_work) {
240 ompt_callbacks.ompt_callback(ompt_callback_work)(
241 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
242 &(task_info->task_data), *pstride, codeptr);
243 }
244#endif
245 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
246 return;
247 }
248 nth = team->t.t_nproc;
249 if (nth == 1) {
250 if (plastiter != NULL)
251 *plastiter = TRUE;
252 *pstride =
253 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
254#ifdef KMP_DEBUG
255 {
256 char *buff;
257 // create format specifiers before the debug output
258 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
259 "lower=%%%s upper=%%%s stride = %%%s\n",
260 traits_t<T>::spec, traits_t<T>::spec,
261 traits_t<ST>::spec);
262 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
263 __kmp_str_free(str: &buff);
264 }
265#endif
266 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
267
268#if OMPT_SUPPORT && OMPT_OPTIONAL
269 if (ompt_enabled.ompt_callback_work) {
270 ompt_callbacks.ompt_callback(ompt_callback_work)(
271 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
272 &(task_info->task_data), *pstride, codeptr);
273 }
274#endif
275 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
276 return;
277 }
278
279 /* compute trip count */
280 if (incr == 1) {
281 trip_count = *pupper - *plower + 1;
282 } else if (incr == -1) {
283 trip_count = *plower - *pupper + 1;
284 } else if (incr > 0) {
285 // upper-lower can exceed the limit of signed type
286 trip_count = (UT)(*pupper - *plower) / incr + 1;
287 } else {
288 KMP_DEBUG_ASSERT(incr != 0);
289 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
290 }
291
292#if KMP_STATS_ENABLED
293 if (KMP_MASTER_GTID(gtid)) {
294 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
295 }
296#endif
297
298 if (__kmp_env_consistency_check) {
299 /* tripcount overflow? */
300 if (trip_count == 0 && *pupper != *plower) {
301 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
302 loc);
303 }
304 }
305
306 /* compute remaining parameters */
307 switch (schedtype) {
308 case kmp_sch_static: {
309 if (trip_count < nth) {
310 KMP_DEBUG_ASSERT(
311 __kmp_static == kmp_sch_static_greedy ||
312 __kmp_static ==
313 kmp_sch_static_balanced); // Unknown static scheduling type.
314 if (tid < trip_count) {
315 *pupper = *plower = *plower + tid * incr;
316 } else {
317 // set bounds so non-active threads execute no iterations
318 *plower = *pupper + (incr > 0 ? 1 : -1);
319 }
320 if (plastiter != NULL)
321 *plastiter = (tid == trip_count - 1);
322 } else {
323 KMP_DEBUG_ASSERT(nth != 0);
324 if (__kmp_static == kmp_sch_static_balanced) {
325 UT small_chunk = trip_count / nth;
326 UT extras = trip_count % nth;
327 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
328 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
329 if (plastiter != NULL)
330 *plastiter = (tid == nth - 1);
331 } else {
332 T big_chunk_inc_count =
333 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
334 T old_upper = *pupper;
335
336 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
337 // Unknown static scheduling type.
338
339 *plower += tid * big_chunk_inc_count;
340 *pupper = *plower + big_chunk_inc_count - incr;
341 if (incr > 0) {
342 if (*pupper < *plower)
343 *pupper = traits_t<T>::max_value;
344 if (plastiter != NULL)
345 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
346 if (*pupper > old_upper)
347 *pupper = old_upper; // tracker C73258
348 } else {
349 if (*pupper > *plower)
350 *pupper = traits_t<T>::min_value;
351 if (plastiter != NULL)
352 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
353 if (*pupper < old_upper)
354 *pupper = old_upper; // tracker C73258
355 }
356 }
357 }
358 *pstride = trip_count;
359 break;
360 }
361 case kmp_sch_static_chunked: {
362 ST span;
363 UT nchunks;
364 KMP_DEBUG_ASSERT(chunk != 0);
365 if (chunk < 1)
366 chunk = 1;
367 else if ((UT)chunk > trip_count)
368 chunk = trip_count;
369 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
370 span = chunk * incr;
371 if (nchunks < nth) {
372 *pstride = span * nchunks;
373 if (tid < nchunks) {
374 *plower = *plower + (span * tid);
375 *pupper = *plower + span - incr;
376 } else {
377 *plower = *pupper + (incr > 0 ? 1 : -1);
378 }
379 } else {
380 *pstride = span * nth;
381 *plower = *plower + (span * tid);
382 *pupper = *plower + span - incr;
383 }
384 if (plastiter != NULL)
385 *plastiter = (tid == (nchunks - 1) % nth);
386 break;
387 }
388 case kmp_sch_static_balanced_chunked: {
389 T old_upper = *pupper;
390 KMP_DEBUG_ASSERT(nth != 0);
391 // round up to make sure the chunk is enough to cover all iterations
392 UT span = (trip_count + nth - 1) / nth;
393
394 // perform chunk adjustment
395 chunk = (span + chunk - 1) & ~(chunk - 1);
396
397 span = chunk * incr;
398 *plower = *plower + (span * tid);
399 *pupper = *plower + span - incr;
400 if (incr > 0) {
401 if (*pupper > old_upper)
402 *pupper = old_upper;
403 } else if (*pupper < old_upper)
404 *pupper = old_upper;
405
406 if (plastiter != NULL) {
407 KMP_DEBUG_ASSERT(chunk != 0);
408 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
409 }
410 break;
411 }
412 default:
413 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
414 break;
415 }
416
417#if USE_ITT_BUILD
418 // Report loop metadata
419 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
420 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
421 team->t.t_active_level == 1) {
422 kmp_uint64 cur_chunk = chunk;
423 check_loc(loc);
424 // Calculate chunk in case it was not specified; it is specified for
425 // kmp_sch_static_chunked
426 if (schedtype == kmp_sch_static) {
427 KMP_DEBUG_ASSERT(nth != 0);
428 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
429 }
430 // 0 - "static" schedule
431 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
432 }
433#endif
434#ifdef KMP_DEBUG
435 {
436 char *buff;
437 // create format specifiers before the debug output
438 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
439 "upper=%%%s stride = %%%s signed?<%s>\n",
440 traits_t<T>::spec, traits_t<T>::spec,
441 traits_t<ST>::spec, traits_t<T>::spec);
442 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
443 __kmp_str_free(str: &buff);
444 }
445#endif
446 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
447
448#if OMPT_SUPPORT && OMPT_OPTIONAL
449 if (ompt_enabled.ompt_callback_work) {
450 ompt_callbacks.ompt_callback(ompt_callback_work)(
451 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
452 &(task_info->task_data), trip_count, codeptr);
453 }
454 if (ompt_enabled.ompt_callback_dispatch) {
455 ompt_dispatch_t dispatch_type;
456 ompt_data_t instance = ompt_data_none;
457 ompt_dispatch_chunk_t dispatch_chunk;
458 if (ompt_work_type == ompt_work_sections) {
459 dispatch_type = ompt_dispatch_section;
460 instance.ptr = codeptr;
461 } else {
462 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
463 dispatch_type = (ompt_work_type == ompt_work_distribute)
464 ? ompt_dispatch_distribute_chunk
465 : ompt_dispatch_ws_loop_chunk;
466 instance.ptr = &dispatch_chunk;
467 }
468 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
469 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
470 instance);
471 }
472#endif
473
474 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
475 return;
476}
477
478template <typename T>
479static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
480 kmp_int32 schedule, kmp_int32 *plastiter,
481 T *plower, T *pupper, T *pupperDist,
482 typename traits_t<T>::signed_t *pstride,
483 typename traits_t<T>::signed_t incr,
484 typename traits_t<T>::signed_t chunk
485#if OMPT_SUPPORT && OMPT_OPTIONAL
486 ,
487 void *codeptr
488#endif
489) {
490 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
491 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
492 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
493 typedef typename traits_t<T>::unsigned_t UT;
494 typedef typename traits_t<T>::signed_t ST;
495 kmp_uint32 tid;
496 kmp_uint32 nth;
497 kmp_uint32 team_id;
498 kmp_uint32 nteams;
499 UT trip_count;
500 kmp_team_t *team;
501 kmp_info_t *th;
502
503 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
504 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
505 __kmp_assert_valid_gtid(gtid);
506#ifdef KMP_DEBUG
507 {
508 char *buff;
509 // create format specifiers before the debug output
510 buff = __kmp_str_format(
511 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
512 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
513 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
514 traits_t<ST>::spec, traits_t<T>::spec);
515 KD_TRACE(100,
516 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
517 __kmp_str_free(str: &buff);
518 }
519#endif
520
521 if (__kmp_env_consistency_check) {
522 __kmp_push_workshare(gtid, ct: ct_pdo, ident: loc);
523 if (incr == 0) {
524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
525 loc);
526 }
527 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
528 // The loop is illegal.
529 // Some zero-trip loops maintained by compiler, e.g.:
530 // for(i=10;i<0;++i) // lower >= upper - run-time check
531 // for(i=0;i>10;--i) // lower <= upper - run-time check
532 // for(i=0;i>10;++i) // incr > 0 - compile-time check
533 // for(i=10;i<0;--i) // incr < 0 - compile-time check
534 // Compiler does not check the following illegal loops:
535 // for(i=0;i<10;i+=incr) // where incr<0
536 // for(i=10;i>0;i-=incr) // where incr<0
537 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
538 }
539 }
540 tid = __kmp_tid_from_gtid(gtid);
541 th = __kmp_threads[gtid];
542 nth = th->th.th_team_nproc;
543 team = th->th.th_team;
544 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
545 // skip optional serialized teams to prevent this from using the wrong teams
546 // information when called after __kmp_serialized_parallel
547 // TODO: make __kmp_serialized_parallel eventually call __kmp_fork_in_teams
548 // to address this edge case
549 while (team->t.t_parent && team->t.t_serialized)
550 team = team->t.t_parent;
551 nteams = th->th.th_teams_size.nteams;
552 team_id = team->t.t_master_tid;
553 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
554
555 // compute global trip count
556 if (incr == 1) {
557 trip_count = *pupper - *plower + 1;
558 } else if (incr == -1) {
559 trip_count = *plower - *pupper + 1;
560 } else if (incr > 0) {
561 // upper-lower can exceed the limit of signed type
562 trip_count = (UT)(*pupper - *plower) / incr + 1;
563 } else {
564 KMP_DEBUG_ASSERT(incr != 0);
565 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
566 }
567
568 *pstride = *pupper - *plower; // just in case (can be unused)
569 if (trip_count <= nteams) {
570 KMP_DEBUG_ASSERT(
571 __kmp_static == kmp_sch_static_greedy ||
572 __kmp_static ==
573 kmp_sch_static_balanced); // Unknown static scheduling type.
574 // only primary threads of some teams get single iteration, other threads
575 // get nothing
576 if (team_id < trip_count && tid == 0) {
577 *pupper = *pupperDist = *plower = *plower + team_id * incr;
578 } else {
579 *pupperDist = *pupper;
580 *plower = *pupper + incr; // compiler should skip loop body
581 }
582 if (plastiter != NULL)
583 *plastiter = (tid == 0 && team_id == trip_count - 1);
584 } else {
585 // Get the team's chunk first (each team gets at most one chunk)
586 KMP_DEBUG_ASSERT(nteams != 0);
587 if (__kmp_static == kmp_sch_static_balanced) {
588 UT chunkD = trip_count / nteams;
589 UT extras = trip_count % nteams;
590 *plower +=
591 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
592 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
593 if (plastiter != NULL)
594 *plastiter = (team_id == nteams - 1);
595 } else {
596 T chunk_inc_count =
597 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
598 T upper = *pupper;
599 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
600 // Unknown static scheduling type.
601 *plower += team_id * chunk_inc_count;
602 *pupperDist = *plower + chunk_inc_count - incr;
603 // Check/correct bounds if needed
604 if (incr > 0) {
605 if (*pupperDist < *plower)
606 *pupperDist = traits_t<T>::max_value;
607 if (plastiter != NULL)
608 *plastiter = *plower <= upper && *pupperDist > upper - incr;
609 if (*pupperDist > upper)
610 *pupperDist = upper; // tracker C73258
611 if (*plower > *pupperDist) {
612 *pupper = *pupperDist; // no iterations available for the team
613 goto end;
614 }
615 } else {
616 if (*pupperDist > *plower)
617 *pupperDist = traits_t<T>::min_value;
618 if (plastiter != NULL)
619 *plastiter = *plower >= upper && *pupperDist < upper - incr;
620 if (*pupperDist < upper)
621 *pupperDist = upper; // tracker C73258
622 if (*plower < *pupperDist) {
623 *pupper = *pupperDist; // no iterations available for the team
624 goto end;
625 }
626 }
627 }
628 // Get the parallel loop chunk now (for thread)
629 // compute trip count for team's chunk
630 if (incr == 1) {
631 trip_count = *pupperDist - *plower + 1;
632 } else if (incr == -1) {
633 trip_count = *plower - *pupperDist + 1;
634 } else if (incr > 1) {
635 // upper-lower can exceed the limit of signed type
636 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
637 } else {
638 KMP_DEBUG_ASSERT(incr != 0);
639 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
640 }
641 KMP_DEBUG_ASSERT(trip_count);
642 switch (schedule) {
643 case kmp_sch_static: {
644 if (trip_count <= nth) {
645 KMP_DEBUG_ASSERT(
646 __kmp_static == kmp_sch_static_greedy ||
647 __kmp_static ==
648 kmp_sch_static_balanced); // Unknown static scheduling type.
649 if (tid < trip_count)
650 *pupper = *plower = *plower + tid * incr;
651 else
652 *plower = *pupper + incr; // no iterations available
653 if (plastiter != NULL)
654 if (*plastiter != 0 && !(tid == trip_count - 1))
655 *plastiter = 0;
656 } else {
657 KMP_DEBUG_ASSERT(nth != 0);
658 if (__kmp_static == kmp_sch_static_balanced) {
659 UT chunkL = trip_count / nth;
660 UT extras = trip_count % nth;
661 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
662 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
663 if (plastiter != NULL)
664 if (*plastiter != 0 && !(tid == nth - 1))
665 *plastiter = 0;
666 } else {
667 T chunk_inc_count =
668 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
669 T upper = *pupperDist;
670 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
671 // Unknown static scheduling type.
672 *plower += tid * chunk_inc_count;
673 *pupper = *plower + chunk_inc_count - incr;
674 if (incr > 0) {
675 if (*pupper < *plower)
676 *pupper = traits_t<T>::max_value;
677 if (plastiter != NULL)
678 if (*plastiter != 0 &&
679 !(*plower <= upper && *pupper > upper - incr))
680 *plastiter = 0;
681 if (*pupper > upper)
682 *pupper = upper; // tracker C73258
683 } else {
684 if (*pupper > *plower)
685 *pupper = traits_t<T>::min_value;
686 if (plastiter != NULL)
687 if (*plastiter != 0 &&
688 !(*plower >= upper && *pupper < upper - incr))
689 *plastiter = 0;
690 if (*pupper < upper)
691 *pupper = upper; // tracker C73258
692 }
693 }
694 }
695 break;
696 }
697 case kmp_sch_static_chunked: {
698 ST span;
699 if (chunk < 1)
700 chunk = 1;
701 span = chunk * incr;
702 *pstride = span * nth;
703 *plower = *plower + (span * tid);
704 *pupper = *plower + span - incr;
705 if (plastiter != NULL) {
706 KMP_DEBUG_ASSERT(chunk != 0);
707 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
708 *plastiter = 0;
709 }
710 break;
711 }
712 default:
713 KMP_ASSERT2(0,
714 "__kmpc_dist_for_static_init: unknown loop scheduling type");
715 break;
716 }
717 }
718end:;
719#ifdef KMP_DEBUG
720 {
721 char *buff;
722 // create format specifiers before the debug output
723 buff = __kmp_str_format(
724 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
725 "stride=%%%s signed?<%s>\n",
726 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
727 traits_t<ST>::spec, traits_t<T>::spec);
728 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
729 __kmp_str_free(str: &buff);
730 }
731#endif
732 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
733#if OMPT_SUPPORT && OMPT_OPTIONAL
734 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
735 ompt_team_info_t *team_info = __ompt_get_teaminfo(depth: 0, NULL);
736 ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: 0);
737 if (ompt_enabled.ompt_callback_work) {
738 ompt_callbacks.ompt_callback(ompt_callback_work)(
739 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
740 &(task_info->task_data), 0, codeptr);
741 }
742 if (ompt_enabled.ompt_callback_dispatch) {
743 ompt_data_t instance = ompt_data_none;
744 ompt_dispatch_chunk_t dispatch_chunk;
745 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
746 instance.ptr = &dispatch_chunk;
747 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
748 &(team_info->parallel_data), &(task_info->task_data),
749 ompt_dispatch_distribute_chunk, instance);
750 }
751 }
752#endif // OMPT_SUPPORT && OMPT_OPTIONAL
753 KMP_STATS_LOOP_END(OMP_distribute_iterations);
754 return;
755}
756
757template <typename T>
758static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
759 kmp_int32 *p_last, T *p_lb, T *p_ub,
760 typename traits_t<T>::signed_t *p_st,
761 typename traits_t<T>::signed_t incr,
762 typename traits_t<T>::signed_t chunk) {
763 // The routine returns the first chunk distributed to the team and
764 // stride for next chunks calculation.
765 // Last iteration flag set for the team that will execute
766 // the last iteration of the loop.
767 // The routine is called for dist_schedule(static,chunk) only.
768 typedef typename traits_t<T>::unsigned_t UT;
769 typedef typename traits_t<T>::signed_t ST;
770 kmp_uint32 team_id;
771 kmp_uint32 nteams;
772 UT trip_count;
773 T lower;
774 T upper;
775 ST span;
776 kmp_team_t *team;
777 kmp_info_t *th;
778
779 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
780 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
781 __kmp_assert_valid_gtid(gtid);
782#ifdef KMP_DEBUG
783 {
784 char *buff;
785 // create format specifiers before the debug output
786 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
787 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
788 traits_t<T>::spec, traits_t<T>::spec,
789 traits_t<ST>::spec, traits_t<ST>::spec,
790 traits_t<T>::spec);
791 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
792 __kmp_str_free(str: &buff);
793 }
794#endif
795
796 lower = *p_lb;
797 upper = *p_ub;
798 if (__kmp_env_consistency_check) {
799 if (incr == 0) {
800 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
801 loc);
802 }
803 if (incr > 0 ? (upper < lower) : (lower < upper)) {
804 // The loop is illegal.
805 // Some zero-trip loops maintained by compiler, e.g.:
806 // for(i=10;i<0;++i) // lower >= upper - run-time check
807 // for(i=0;i>10;--i) // lower <= upper - run-time check
808 // for(i=0;i>10;++i) // incr > 0 - compile-time check
809 // for(i=10;i<0;--i) // incr < 0 - compile-time check
810 // Compiler does not check the following illegal loops:
811 // for(i=0;i<10;i+=incr) // where incr<0
812 // for(i=10;i>0;i-=incr) // where incr<0
813 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
814 }
815 }
816 th = __kmp_threads[gtid];
817 team = th->th.th_team;
818 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
819 nteams = th->th.th_teams_size.nteams;
820 team_id = team->t.t_master_tid;
821 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
822
823 // compute trip count
824 if (incr == 1) {
825 trip_count = upper - lower + 1;
826 } else if (incr == -1) {
827 trip_count = lower - upper + 1;
828 } else if (incr > 0) {
829 // upper-lower can exceed the limit of signed type
830 trip_count = (UT)(upper - lower) / incr + 1;
831 } else {
832 KMP_DEBUG_ASSERT(incr != 0);
833 trip_count = (UT)(lower - upper) / (-incr) + 1;
834 }
835 if (chunk < 1)
836 chunk = 1;
837 span = chunk * incr;
838 *p_st = span * nteams;
839 *p_lb = lower + (span * team_id);
840 *p_ub = *p_lb + span - incr;
841 if (p_last != NULL) {
842 KMP_DEBUG_ASSERT(chunk != 0);
843 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
844 }
845 // Correct upper bound if needed
846 if (incr > 0) {
847 if (*p_ub < *p_lb) // overflow?
848 *p_ub = traits_t<T>::max_value;
849 if (*p_ub > upper)
850 *p_ub = upper; // tracker C73258
851 } else { // incr < 0
852 if (*p_ub > *p_lb)
853 *p_ub = traits_t<T>::min_value;
854 if (*p_ub < upper)
855 *p_ub = upper; // tracker C73258
856 }
857#ifdef KMP_DEBUG
858 {
859 char *buff;
860 // create format specifiers before the debug output
861 buff =
862 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
863 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
864 traits_t<T>::spec, traits_t<T>::spec,
865 traits_t<ST>::spec, traits_t<ST>::spec);
866 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
867 __kmp_str_free(str: &buff);
868 }
869#endif
870}
871
872//------------------------------------------------------------------------------
873extern "C" {
874/*!
875@ingroup WORK_SHARING
876@param loc Source code location
877@param gtid Global thread id of this thread
878@param schedtype Scheduling type
879@param plastiter Pointer to the "last iteration" flag
880@param plower Pointer to the lower bound
881@param pupper Pointer to the upper bound
882@param pstride Pointer to the stride
883@param incr Loop increment
884@param chunk The chunk size
885
886Each of the four functions here are identical apart from the argument types.
887
888The functions compute the upper and lower bounds and stride to be used for the
889set of iterations to be executed by the current thread from the statically
890scheduled loop that is described by the initial values of the bounds, stride,
891increment and chunk size.
892
893@{
894*/
895void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
896 kmp_int32 *plastiter, kmp_int32 *plower,
897 kmp_int32 *pupper, kmp_int32 *pstride,
898 kmp_int32 incr, kmp_int32 chunk) {
899 __kmp_for_static_init<kmp_int32>(loc, global_tid: gtid, schedtype, plastiter, plower,
900 pupper, pstride, incr, chunk
901#if OMPT_SUPPORT && OMPT_OPTIONAL
902 ,
903 OMPT_GET_RETURN_ADDRESS(0)
904#endif
905 );
906}
907
908/*!
909 See @ref __kmpc_for_static_init_4
910 */
911void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
912 kmp_int32 schedtype, kmp_int32 *plastiter,
913 kmp_uint32 *plower, kmp_uint32 *pupper,
914 kmp_int32 *pstride, kmp_int32 incr,
915 kmp_int32 chunk) {
916 __kmp_for_static_init<kmp_uint32>(loc, global_tid: gtid, schedtype, plastiter, plower,
917 pupper, pstride, incr, chunk
918#if OMPT_SUPPORT && OMPT_OPTIONAL
919 ,
920 OMPT_GET_RETURN_ADDRESS(0)
921#endif
922 );
923}
924
925/*!
926 See @ref __kmpc_for_static_init_4
927 */
928void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
929 kmp_int32 *plastiter, kmp_int64 *plower,
930 kmp_int64 *pupper, kmp_int64 *pstride,
931 kmp_int64 incr, kmp_int64 chunk) {
932 __kmp_for_static_init<kmp_int64>(loc, global_tid: gtid, schedtype, plastiter, plower,
933 pupper, pstride, incr, chunk
934#if OMPT_SUPPORT && OMPT_OPTIONAL
935 ,
936 OMPT_GET_RETURN_ADDRESS(0)
937#endif
938 );
939}
940
941/*!
942 See @ref __kmpc_for_static_init_4
943 */
944void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
945 kmp_int32 schedtype, kmp_int32 *plastiter,
946 kmp_uint64 *plower, kmp_uint64 *pupper,
947 kmp_int64 *pstride, kmp_int64 incr,
948 kmp_int64 chunk) {
949 __kmp_for_static_init<kmp_uint64>(loc, global_tid: gtid, schedtype, plastiter, plower,
950 pupper, pstride, incr, chunk
951#if OMPT_SUPPORT && OMPT_OPTIONAL
952 ,
953 OMPT_GET_RETURN_ADDRESS(0)
954#endif
955 );
956}
957/*!
958@}
959*/
960
961#if OMPT_SUPPORT && OMPT_OPTIONAL
962#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
963#else
964#define OMPT_CODEPTR_ARG
965#endif
966
967/*!
968@ingroup WORK_SHARING
969@param loc Source code location
970@param gtid Global thread id of this thread
971@param schedule Scheduling type for the parallel loop
972@param plastiter Pointer to the "last iteration" flag
973@param plower Pointer to the lower bound
974@param pupper Pointer to the upper bound of loop chunk
975@param pupperD Pointer to the upper bound of dist_chunk
976@param pstride Pointer to the stride for parallel loop
977@param incr Loop increment
978@param chunk The chunk size for the parallel loop
979
980Each of the four functions here are identical apart from the argument types.
981
982The functions compute the upper and lower bounds and strides to be used for the
983set of iterations to be executed by the current thread from the statically
984scheduled loop that is described by the initial values of the bounds, strides,
985increment and chunks for parallel loop and distribute constructs.
986
987@{
988*/
989void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
990 kmp_int32 schedule, kmp_int32 *plastiter,
991 kmp_int32 *plower, kmp_int32 *pupper,
992 kmp_int32 *pupperD, kmp_int32 *pstride,
993 kmp_int32 incr, kmp_int32 chunk) {
994 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
995 pupper, pupperDist: pupperD, pstride, incr,
996 chunk OMPT_CODEPTR_ARG);
997}
998
999/*!
1000 See @ref __kmpc_dist_for_static_init_4
1001 */
1002void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
1003 kmp_int32 schedule, kmp_int32 *plastiter,
1004 kmp_uint32 *plower, kmp_uint32 *pupper,
1005 kmp_uint32 *pupperD, kmp_int32 *pstride,
1006 kmp_int32 incr, kmp_int32 chunk) {
1007 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
1008 pupper, pupperDist: pupperD, pstride, incr,
1009 chunk OMPT_CODEPTR_ARG);
1010}
1011
1012/*!
1013 See @ref __kmpc_dist_for_static_init_4
1014 */
1015void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
1016 kmp_int32 schedule, kmp_int32 *plastiter,
1017 kmp_int64 *plower, kmp_int64 *pupper,
1018 kmp_int64 *pupperD, kmp_int64 *pstride,
1019 kmp_int64 incr, kmp_int64 chunk) {
1020 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
1021 pupper, pupperDist: pupperD, pstride, incr,
1022 chunk OMPT_CODEPTR_ARG);
1023}
1024
1025/*!
1026 See @ref __kmpc_dist_for_static_init_4
1027 */
1028void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1029 kmp_int32 schedule, kmp_int32 *plastiter,
1030 kmp_uint64 *plower, kmp_uint64 *pupper,
1031 kmp_uint64 *pupperD, kmp_int64 *pstride,
1032 kmp_int64 incr, kmp_int64 chunk) {
1033 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1034 pupper, pupperDist: pupperD, pstride, incr,
1035 chunk OMPT_CODEPTR_ARG);
1036}
1037/*!
1038@}
1039*/
1040
1041//------------------------------------------------------------------------------
1042// Auxiliary routines for Distribute Parallel Loop construct implementation
1043// Transfer call to template< type T >
1044// __kmp_team_static_init( ident_t *loc, int gtid,
1045// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1046
1047/*!
1048@ingroup WORK_SHARING
1049@{
1050@param loc Source location
1051@param gtid Global thread id
1052@param p_last pointer to last iteration flag
1053@param p_lb pointer to Lower bound
1054@param p_ub pointer to Upper bound
1055@param p_st Step (or increment if you prefer)
1056@param incr Loop increment
1057@param chunk The chunk size to block with
1058
1059The functions compute the upper and lower bounds and stride to be used for the
1060set of iterations to be executed by the current team from the statically
1061scheduled loop that is described by the initial values of the bounds, stride,
1062increment and chunk for the distribute construct as part of composite distribute
1063parallel loop construct. These functions are all identical apart from the types
1064of the arguments.
1065*/
1066
1067void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1068 kmp_int32 *p_lb, kmp_int32 *p_ub,
1069 kmp_int32 *p_st, kmp_int32 incr,
1070 kmp_int32 chunk) {
1071 KMP_DEBUG_ASSERT(__kmp_init_serial);
1072 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1073 chunk);
1074}
1075
1076/*!
1077 See @ref __kmpc_team_static_init_4
1078 */
1079void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1080 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1081 kmp_int32 *p_st, kmp_int32 incr,
1082 kmp_int32 chunk) {
1083 KMP_DEBUG_ASSERT(__kmp_init_serial);
1084 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1085 chunk);
1086}
1087
1088/*!
1089 See @ref __kmpc_team_static_init_4
1090 */
1091void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1092 kmp_int64 *p_lb, kmp_int64 *p_ub,
1093 kmp_int64 *p_st, kmp_int64 incr,
1094 kmp_int64 chunk) {
1095 KMP_DEBUG_ASSERT(__kmp_init_serial);
1096 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1097 chunk);
1098}
1099
1100/*!
1101 See @ref __kmpc_team_static_init_4
1102 */
1103void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1104 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1105 kmp_int64 *p_st, kmp_int64 incr,
1106 kmp_int64 chunk) {
1107 KMP_DEBUG_ASSERT(__kmp_init_serial);
1108 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1109 chunk);
1110}
1111/*!
1112@}
1113*/
1114
1115} // extern "C"
1116

source code of openmp/runtime/src/kmp_sched.cpp