| 1 | #ifndef KMP_STATS_H |
| 2 | #define KMP_STATS_H |
| 3 | |
| 4 | /** @file kmp_stats.h |
| 5 | * Functions for collecting statistics. |
| 6 | */ |
| 7 | |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 11 | // See https://llvm.org/LICENSE.txt for license information. |
| 12 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "kmp_config.h" |
| 17 | #include "kmp_debug.h" |
| 18 | |
| 19 | #if KMP_STATS_ENABLED |
| 20 | /* Statistics accumulator. |
| 21 | Accumulates number of samples and computes min, max, mean, standard deviation |
| 22 | on the fly. |
| 23 | |
| 24 | Online variance calculation algorithm from |
| 25 | http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm |
| 26 | */ |
| 27 | |
| 28 | #include "kmp_stats_timing.h" |
| 29 | #include <limits> |
| 30 | #include <math.h> |
| 31 | #include <new> // placement new |
| 32 | #include <stdint.h> |
| 33 | #include <string> |
| 34 | #include <vector> |
| 35 | |
| 36 | /* Enable developer statistics here if you want them. They are more detailed |
| 37 | than is useful for application characterisation and are intended for the |
| 38 | runtime library developer. */ |
| 39 | #define KMP_DEVELOPER_STATS 0 |
| 40 | |
| 41 | /* Enable/Disable histogram output */ |
| 42 | #define KMP_STATS_HIST 0 |
| 43 | |
| 44 | /*! |
| 45 | * @ingroup STATS_GATHERING |
| 46 | * \brief flags to describe the statistic (timer or counter) |
| 47 | * |
| 48 | */ |
| 49 | enum stats_flags_e { |
| 50 | noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic |
| 51 | onlyInMaster = 1 << 1, //!< statistic is valid only for primary thread |
| 52 | noUnits = 1 << 2, //!< statistic doesn't need units printed next to it |
| 53 | notInMaster = 1 << 3, //!< statistic is valid only for non-primary threads |
| 54 | logEvent = 1 << 4 //!< statistic can be logged on the event timeline when |
| 55 | //! KMP_STATS_EVENTS is on (valid only for timers) |
| 56 | }; |
| 57 | |
| 58 | /*! |
| 59 | * @ingroup STATS_GATHERING |
| 60 | * \brief the states which a thread can be in |
| 61 | * |
| 62 | */ |
| 63 | enum stats_state_e { |
| 64 | IDLE, |
| 65 | SERIAL_REGION, |
| 66 | FORK_JOIN_BARRIER, |
| 67 | PLAIN_BARRIER, |
| 68 | TASKWAIT, |
| 69 | TASKYIELD, |
| 70 | TASKGROUP, |
| 71 | IMPLICIT_TASK, |
| 72 | EXPLICIT_TASK, |
| 73 | TEAMS_REGION |
| 74 | }; |
| 75 | |
| 76 | /*! |
| 77 | * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h |
| 78 | * |
| 79 | * @param macro a user defined macro that takes three arguments - |
| 80 | * macro(COUNTER_NAME, flags, arg) |
| 81 | * @param arg a user defined argument to send to the user defined macro |
| 82 | * |
| 83 | * \details A counter counts the occurrence of some event. Each thread |
| 84 | * accumulates its own count, at the end of execution the counts are aggregated |
| 85 | * treating each thread as a separate measurement. (Unless onlyInMaster is set, |
| 86 | * in which case there's only a single measurement). The min,mean,max are |
| 87 | * therefore the values for the threads. Adding the counter here and then |
| 88 | * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you |
| 89 | * need to do. All of the tables and printing is generated from this macro. |
| 90 | * Format is "macro(name, flags, arg)" |
| 91 | * |
| 92 | * @ingroup STATS_GATHERING |
| 93 | */ |
| 94 | // clang-format off |
| 95 | #define KMP_FOREACH_COUNTER(macro, arg) \ |
| 96 | macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \ |
| 97 | macro(OMP_NESTED_PARALLEL, 0, arg) \ |
| 98 | macro(OMP_LOOP_STATIC, 0, arg) \ |
| 99 | macro(OMP_LOOP_STATIC_STEAL, 0, arg) \ |
| 100 | macro(OMP_LOOP_DYNAMIC, 0, arg) \ |
| 101 | macro(OMP_DISTRIBUTE, 0, arg) \ |
| 102 | macro(OMP_BARRIER, 0, arg) \ |
| 103 | macro(OMP_CRITICAL, 0, arg) \ |
| 104 | macro(OMP_SINGLE, 0, arg) \ |
| 105 | macro(OMP_SECTIONS, 0, arg) \ |
| 106 | macro(OMP_MASTER, 0, arg) \ |
| 107 | macro(OMP_MASKED, 0, arg) \ |
| 108 | macro(OMP_TEAMS, 0, arg) \ |
| 109 | macro(OMP_set_lock, 0, arg) \ |
| 110 | macro(OMP_test_lock, 0, arg) \ |
| 111 | macro(REDUCE_wait, 0, arg) \ |
| 112 | macro(REDUCE_nowait, 0, arg) \ |
| 113 | macro(OMP_TASKYIELD, 0, arg) \ |
| 114 | macro(OMP_TASKLOOP, 0, arg) \ |
| 115 | macro(TASK_executed, 0, arg) \ |
| 116 | macro(TASK_cancelled, 0, arg) \ |
| 117 | macro(TASK_stolen, 0, arg) |
| 118 | // clang-format on |
| 119 | |
| 120 | /*! |
| 121 | * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h |
| 122 | * |
| 123 | * @param macro a user defined macro that takes three arguments - |
| 124 | * macro(TIMER_NAME, flags, arg) |
| 125 | * @param arg a user defined argument to send to the user defined macro |
| 126 | * |
| 127 | * \details A timer collects multiple samples of some count in each thread and |
| 128 | * then finally aggregates all of the samples from all of the threads. For most |
| 129 | * timers the printing code also provides an aggregation over the thread totals. |
| 130 | * These are printed as TOTAL_foo. The count is normally a time (in ticks), |
| 131 | * hence the name "timer". (But can be any value, so we use this for "number of |
| 132 | * arguments passed to fork" as well). For timers the threads are not |
| 133 | * significant, it's the individual observations that count, so the statistics |
| 134 | * are at that level. Format is "macro(name, flags, arg)" |
| 135 | * |
| 136 | * @ingroup STATS_GATHERING2 |
| 137 | */ |
| 138 | // clang-format off |
| 139 | #define KMP_FOREACH_TIMER(macro, arg) \ |
| 140 | macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ |
| 141 | macro (OMP_parallel, stats_flags_e::logEvent, arg) \ |
| 142 | macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \ |
| 143 | macro (OMP_teams, stats_flags_e::logEvent, arg) \ |
| 144 | macro (OMP_teams_overhead, stats_flags_e::logEvent, arg) \ |
| 145 | macro (OMP_loop_static, 0, arg) \ |
| 146 | macro (OMP_loop_static_scheduling, 0, arg) \ |
| 147 | macro (OMP_loop_dynamic, 0, arg) \ |
| 148 | macro (OMP_loop_dynamic_scheduling, 0, arg) \ |
| 149 | macro (OMP_distribute, 0, arg) \ |
| 150 | macro (OMP_distribute_scheduling, 0, arg) \ |
| 151 | macro (OMP_critical, 0, arg) \ |
| 152 | macro (OMP_critical_wait, 0, arg) \ |
| 153 | macro (OMP_single, 0, arg) \ |
| 154 | macro (OMP_sections, 0, arg) \ |
| 155 | macro (OMP_sections_overhead, 0, arg) \ |
| 156 | macro (OMP_master, 0, arg) \ |
| 157 | macro (OMP_masked, 0, arg) \ |
| 158 | macro (OMP_task_immediate, 0, arg) \ |
| 159 | macro (OMP_task_taskwait, 0, arg) \ |
| 160 | macro (OMP_task_taskyield, 0, arg) \ |
| 161 | macro (OMP_task_taskgroup, 0, arg) \ |
| 162 | macro (OMP_task_join_bar, 0, arg) \ |
| 163 | macro (OMP_task_plain_bar, 0, arg) \ |
| 164 | macro (OMP_taskloop_scheduling, 0, arg) \ |
| 165 | macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ |
| 166 | macro (OMP_idle, stats_flags_e::logEvent, arg) \ |
| 167 | macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ |
| 168 | macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ |
| 169 | macro (OMP_serial, stats_flags_e::logEvent, arg) \ |
| 170 | macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, \ |
| 171 | arg) \ |
| 172 | macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ |
| 173 | arg) \ |
| 174 | macro (OMP_loop_static_iterations, \ |
| 175 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ |
| 176 | macro (OMP_loop_static_total_iterations, \ |
| 177 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ |
| 178 | macro (OMP_loop_dynamic_iterations, \ |
| 179 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ |
| 180 | macro (OMP_loop_dynamic_total_iterations, \ |
| 181 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ |
| 182 | macro (OMP_distribute_iterations, \ |
| 183 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ |
| 184 | KMP_FOREACH_DEVELOPER_TIMER(macro, arg) |
| 185 | // clang-format on |
| 186 | |
| 187 | // OMP_worker_thread_life -- Time from thread becoming an OpenMP thread (either |
| 188 | // initializing OpenMP or being created by a primary |
| 189 | // thread) until the thread is destroyed |
| 190 | // OMP_parallel -- Time thread spends executing work directly |
| 191 | // within a #pragma omp parallel |
| 192 | // OMP_parallel_overhead -- Time thread spends setting up a parallel region |
| 193 | // OMP_loop_static -- Time thread spends executing loop iterations from |
| 194 | // a statically scheduled loop |
| 195 | // OMP_loop_static_scheduling -- Time thread spends scheduling loop iterations |
| 196 | // from a statically scheduled loop |
| 197 | // OMP_loop_dynamic -- Time thread spends executing loop iterations from |
| 198 | // a dynamically scheduled loop |
| 199 | // OMP_loop_dynamic_scheduling -- Time thread spends scheduling loop iterations |
| 200 | // from a dynamically scheduled loop |
| 201 | // OMP_critical -- Time thread spends executing critical section |
| 202 | // OMP_critical_wait -- Time thread spends waiting to enter |
| 203 | // a critical section |
| 204 | // OMP_single -- Time spent executing a "single" region |
| 205 | // OMP_master -- Time spent executing a "master" region |
| 206 | // OMP_masked -- Time spent executing a "masked" region |
| 207 | // OMP_task_immediate -- Time spent executing non-deferred tasks |
| 208 | // OMP_task_taskwait -- Time spent executing tasks inside a taskwait |
| 209 | // construct |
| 210 | // OMP_task_taskyield -- Time spent executing tasks inside a taskyield |
| 211 | // construct |
| 212 | // OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup |
| 213 | // construct |
| 214 | // OMP_task_join_bar -- Time spent executing tasks inside a join barrier |
| 215 | // OMP_task_plain_bar -- Time spent executing tasks inside a barrier |
| 216 | // construct |
| 217 | // OMP_taskloop_scheduling -- Time spent scheduling tasks inside a taskloop |
| 218 | // construct |
| 219 | // OMP_plain_barrier -- Time spent in a #pragma omp barrier construct or |
| 220 | // inside implicit barrier at end of worksharing |
| 221 | // construct |
| 222 | // OMP_idle -- Time worker threads spend waiting for next |
| 223 | // parallel region |
| 224 | // OMP_fork_barrier -- Time spent in a the fork barrier surrounding a |
| 225 | // parallel region |
| 226 | // OMP_join_barrier -- Time spent in a the join barrier surrounding a |
| 227 | // parallel region |
| 228 | // OMP_serial -- Time thread zero spends executing serial code |
| 229 | // OMP_set_numthreads -- Values passed to omp_set_num_threads |
| 230 | // OMP_PARALLEL_args -- Number of arguments passed to a parallel region |
| 231 | // OMP_loop_static_iterations -- Number of iterations thread is assigned for |
| 232 | // statically scheduled loops |
| 233 | // OMP_loop_dynamic_iterations -- Number of iterations thread is assigned for |
| 234 | // dynamically scheduled loops |
| 235 | |
| 236 | #if (KMP_DEVELOPER_STATS) |
| 237 | // Timers which are of interest to runtime library developers, not end users. |
| 238 | // These have to be explicitly enabled in addition to the other stats. |
| 239 | |
| 240 | // KMP_fork_barrier -- time in __kmp_fork_barrier |
| 241 | // KMP_join_barrier -- time in __kmp_join_barrier |
| 242 | // KMP_barrier -- time in __kmp_barrier |
| 243 | // KMP_end_split_barrier -- time in __kmp_end_split_barrier |
| 244 | // KMP_setup_icv_copy -- time in __kmp_setup_icv_copy |
| 245 | // KMP_icv_copy -- start/stop timer for any ICV copying |
| 246 | // KMP_linear_gather -- time in __kmp_linear_barrier_gather |
| 247 | // KMP_linear_release -- time in __kmp_linear_barrier_release |
| 248 | // KMP_tree_gather -- time in __kmp_tree_barrier_gather |
| 249 | // KMP_tree_release -- time in __kmp_tree_barrier_release |
| 250 | // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather |
| 251 | // KMP_hyper_release -- time in __kmp_hyper_barrier_release |
| 252 | // KMP_dist_gather -- time in __kmp_dist_barrier_gather |
| 253 | // KMP_dist_release -- time in __kmp_dist_barrier_release |
| 254 | // clang-format off |
| 255 | #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ |
| 256 | macro(KMP_fork_call, 0, arg) \ |
| 257 | macro(KMP_join_call, 0, arg) \ |
| 258 | macro(KMP_end_split_barrier, 0, arg) \ |
| 259 | macro(KMP_hier_gather, 0, arg) \ |
| 260 | macro(KMP_hier_release, 0, arg) \ |
| 261 | macro(KMP_hyper_gather, 0, arg) \ |
| 262 | macro(KMP_hyper_release, 0, arg) \ |
| 263 | macro(KMP_dist_gather, 0, arg) \ |
| 264 | macro(KMP_dist_release, 0, arg) \ |
| 265 | macro(KMP_linear_gather, 0, arg) \ |
| 266 | macro(KMP_linear_release, 0, arg) \ |
| 267 | macro(KMP_tree_gather, 0, arg) \ |
| 268 | macro(KMP_tree_release, 0, arg) \ |
| 269 | macro(USER_resume, 0, arg) \ |
| 270 | macro(USER_suspend, 0, arg) \ |
| 271 | macro(USER_mwait, 0, arg) \ |
| 272 | macro(KMP_allocate_team, 0, arg) \ |
| 273 | macro(KMP_setup_icv_copy, 0, arg) \ |
| 274 | macro(USER_icv_copy, 0, arg) \ |
| 275 | macro (FOR_static_steal_stolen, \ |
| 276 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ |
| 277 | macro (FOR_static_steal_chunks, \ |
| 278 | stats_flags_e::noUnits | stats_flags_e::noTotal, arg) |
| 279 | #else |
| 280 | #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) |
| 281 | #endif |
| 282 | // clang-format on |
| 283 | |
| 284 | /*! |
| 285 | * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. |
| 286 | * |
| 287 | * @param macro a user defined macro that takes three arguments - |
| 288 | * macro(TIMER_NAME, flags, arg) |
| 289 | * @param arg a user defined argument to send to the user defined macro |
| 290 | * |
| 291 | * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE |
| 292 | * BAD THINGS WILL HAPPEN! |
| 293 | * |
| 294 | * \details Explicit timers are ones where we need to allocate a timer itself |
| 295 | * (as well as the accumulated timing statistics). We allocate these on a |
| 296 | * per-thread basis, and explicitly start and stop them. Block timers just |
| 297 | * allocate the timer itself on the stack, and use the destructor to notice |
| 298 | * block exit; they don't need to be defined here. The name here should be the |
| 299 | * same as that of a timer above. |
| 300 | * |
| 301 | * @ingroup STATS_GATHERING |
| 302 | */ |
| 303 | #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) |
| 304 | |
| 305 | #define ENUMERATE(name, ignore, prefix) prefix##name, |
| 306 | enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST }; |
| 307 | |
| 308 | enum explicit_timer_e { |
| 309 | KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST |
| 310 | }; |
| 311 | |
| 312 | enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST }; |
| 313 | #undef ENUMERATE |
| 314 | |
| 315 | /* |
| 316 | * A logarithmic histogram. It accumulates the number of values in each power of |
| 317 | * ten bin. So 1<=x<10, 10<=x<100, ... |
| 318 | * Mostly useful where we have some big outliers and want to see information |
| 319 | * about them. |
| 320 | */ |
| 321 | class logHistogram { |
| 322 | enum { |
| 323 | numBins = 31, /* Number of powers of 10. If this changes you need to change |
| 324 | * the initializer for binMax */ |
| 325 | |
| 326 | /* |
| 327 | * If you want to use this to analyse values that may be less than 1, (for |
| 328 | * instance times in s), then the logOffset gives you negative powers. |
| 329 | * In our case here, we're just looking at times in ticks, or counts, so we |
| 330 | * can never see values with magnitude < 1 (other than zero), so we can set |
| 331 | * it to 0. As above change the initializer if you change this. |
| 332 | */ |
| 333 | logOffset = 0 |
| 334 | }; |
| 335 | uint32_t KMP_ALIGN_CACHE zeroCount; |
| 336 | struct { |
| 337 | uint32_t count; |
| 338 | double total; |
| 339 | } bins[numBins]; |
| 340 | |
| 341 | static double binMax[numBins]; |
| 342 | |
| 343 | #ifdef KMP_DEBUG |
| 344 | uint64_t _total; |
| 345 | |
| 346 | void check() const { |
| 347 | uint64_t t = zeroCount; |
| 348 | for (int i = 0; i < numBins; i++) |
| 349 | t += bins[i].count; |
| 350 | KMP_DEBUG_ASSERT(t == _total); |
| 351 | } |
| 352 | #else |
| 353 | void check() const {} |
| 354 | #endif |
| 355 | |
| 356 | public: |
| 357 | logHistogram() { reset(); } |
| 358 | |
| 359 | logHistogram(logHistogram const &o) { |
| 360 | for (int i = 0; i < numBins; i++) |
| 361 | bins[i] = o.bins[i]; |
| 362 | #ifdef KMP_DEBUG |
| 363 | _total = o._total; |
| 364 | #endif |
| 365 | } |
| 366 | |
| 367 | void reset() { |
| 368 | zeroCount = 0; |
| 369 | for (int i = 0; i < numBins; i++) { |
| 370 | bins[i].count = 0; |
| 371 | bins[i].total = 0; |
| 372 | } |
| 373 | |
| 374 | #ifdef KMP_DEBUG |
| 375 | _total = 0; |
| 376 | #endif |
| 377 | } |
| 378 | uint32_t count(int b) const { return bins[b + logOffset].count; } |
| 379 | double total(int b) const { return bins[b + logOffset].total; } |
| 380 | static uint32_t findBin(double sample); |
| 381 | |
| 382 | logHistogram &operator+=(logHistogram const &o) { |
| 383 | zeroCount += o.zeroCount; |
| 384 | for (int i = 0; i < numBins; i++) { |
| 385 | bins[i].count += o.bins[i].count; |
| 386 | bins[i].total += o.bins[i].total; |
| 387 | } |
| 388 | #ifdef KMP_DEBUG |
| 389 | _total += o._total; |
| 390 | check(); |
| 391 | #endif |
| 392 | |
| 393 | return *this; |
| 394 | } |
| 395 | |
| 396 | void addSample(double sample); |
| 397 | int minBin() const; |
| 398 | int maxBin() const; |
| 399 | |
| 400 | std::string format(char) const; |
| 401 | }; |
| 402 | |
| 403 | class statistic { |
| 404 | double KMP_ALIGN_CACHE minVal; |
| 405 | double maxVal; |
| 406 | double meanVal; |
| 407 | double m2; |
| 408 | uint64_t sampleCount; |
| 409 | double offset; |
| 410 | bool collectingHist; |
| 411 | logHistogram hist; |
| 412 | |
| 413 | public: |
| 414 | statistic(bool doHist = bool(KMP_STATS_HIST)) { |
| 415 | reset(); |
| 416 | collectingHist = doHist; |
| 417 | } |
| 418 | statistic(statistic const &o) |
| 419 | : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), |
| 420 | sampleCount(o.sampleCount), offset(o.offset), |
| 421 | collectingHist(o.collectingHist), hist(o.hist) {} |
| 422 | statistic(double minv, double maxv, double meanv, uint64_t sc, double sd) |
| 423 | : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc), |
| 424 | sampleCount(sc), offset(0.0), collectingHist(false) {} |
| 425 | bool haveHist() const { return collectingHist; } |
| 426 | double getMin() const { return minVal; } |
| 427 | double getMean() const { return meanVal; } |
| 428 | double getMax() const { return maxVal; } |
| 429 | uint64_t getCount() const { return sampleCount; } |
| 430 | double getSD() const { return sqrt(m2 / sampleCount); } |
| 431 | double getTotal() const { return sampleCount * meanVal; } |
| 432 | logHistogram const *getHist() const { return &hist; } |
| 433 | void setOffset(double d) { offset = d; } |
| 434 | |
| 435 | void reset() { |
| 436 | minVal = (std::numeric_limits<double>::max)(); |
| 437 | maxVal = -minVal; |
| 438 | meanVal = 0.0; |
| 439 | m2 = 0.0; |
| 440 | sampleCount = 0; |
| 441 | offset = 0.0; |
| 442 | hist.reset(); |
| 443 | } |
| 444 | void addSample(double sample); |
| 445 | void scale(double factor); |
| 446 | void scaleDown(double f) { scale(1. / f); } |
| 447 | void forceCount(uint64_t count) { sampleCount = count; } |
| 448 | statistic &operator+=(statistic const &other); |
| 449 | |
| 450 | std::string format(char unit, bool total = false) const; |
| 451 | std::string formatHist(char unit) const { return hist.format(unit); } |
| 452 | }; |
| 453 | |
| 454 | struct statInfo { |
| 455 | const char *name; |
| 456 | uint32_t flags; |
| 457 | }; |
| 458 | |
| 459 | class timeStat : public statistic { |
| 460 | static statInfo timerInfo[]; |
| 461 | |
| 462 | public: |
| 463 | timeStat() : statistic() {} |
| 464 | static const char *name(timer_e e) { return timerInfo[e].name; } |
| 465 | static bool noTotal(timer_e e) { |
| 466 | return timerInfo[e].flags & stats_flags_e::noTotal; |
| 467 | } |
| 468 | static bool masterOnly(timer_e e) { |
| 469 | return timerInfo[e].flags & stats_flags_e::onlyInMaster; |
| 470 | } |
| 471 | static bool workerOnly(timer_e e) { |
| 472 | return timerInfo[e].flags & stats_flags_e::notInMaster; |
| 473 | } |
| 474 | static bool noUnits(timer_e e) { |
| 475 | return timerInfo[e].flags & stats_flags_e::noUnits; |
| 476 | } |
| 477 | static bool logEvent(timer_e e) { |
| 478 | return timerInfo[e].flags & stats_flags_e::logEvent; |
| 479 | } |
| 480 | static void clearEventFlags() { |
| 481 | for (int i = 0; i < TIMER_LAST; i++) { |
| 482 | timerInfo[i].flags &= (~(stats_flags_e::logEvent)); |
| 483 | } |
| 484 | } |
| 485 | }; |
| 486 | |
| 487 | // Where we need explicitly to start and end the timer, this version can be used |
| 488 | // Since these timers normally aren't nicely scoped, so don't have a good place |
| 489 | // to live on the stack of the thread, they're more work to use. |
| 490 | class explicitTimer { |
| 491 | timeStat *stat; |
| 492 | timer_e timerEnumValue; |
| 493 | tsc_tick_count startTime; |
| 494 | tsc_tick_count pauseStartTime; |
| 495 | tsc_tick_count::tsc_interval_t totalPauseTime; |
| 496 | |
| 497 | public: |
| 498 | explicitTimer(timeStat *s, timer_e te) |
| 499 | : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0), |
| 500 | totalPauseTime() {} |
| 501 | |
| 502 | // void setStat(timeStat *s) { stat = s; } |
| 503 | void start(tsc_tick_count tick); |
| 504 | void pause(tsc_tick_count tick) { pauseStartTime = tick; } |
| 505 | void resume(tsc_tick_count tick) { |
| 506 | totalPauseTime += (tick - pauseStartTime); |
| 507 | } |
| 508 | void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr = nullptr); |
| 509 | void reset() { |
| 510 | startTime = 0; |
| 511 | pauseStartTime = 0; |
| 512 | totalPauseTime = 0; |
| 513 | } |
| 514 | timer_e get_type() const { return timerEnumValue; } |
| 515 | }; |
| 516 | |
| 517 | // Where you need to partition a threads clock ticks into separate states |
| 518 | // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and |
| 519 | // DOING_NOTHING would render these conditions: |
| 520 | // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive |
| 521 | // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice |
| 522 | // versa |
| 523 | class partitionedTimers { |
| 524 | private: |
| 525 | std::vector<explicitTimer> timer_stack; |
| 526 | |
| 527 | public: |
| 528 | partitionedTimers(); |
| 529 | void init(explicitTimer timer); |
| 530 | void exchange(explicitTimer timer); |
| 531 | void push(explicitTimer timer); |
| 532 | void pop(); |
| 533 | void windup(); |
| 534 | }; |
| 535 | |
| 536 | // Special wrapper around the partitioned timers to aid timing code blocks |
| 537 | // It avoids the need to have an explicit end, leaving the scope suffices. |
| 538 | class blockPartitionedTimer { |
| 539 | partitionedTimers *part_timers; |
| 540 | |
| 541 | public: |
| 542 | blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer) |
| 543 | : part_timers(pt) { |
| 544 | part_timers->push(timer); |
| 545 | } |
| 546 | ~blockPartitionedTimer() { part_timers->pop(); } |
| 547 | }; |
| 548 | |
| 549 | // Special wrapper around the thread state to aid in keeping state in code |
| 550 | // blocks It avoids the need to have an explicit end, leaving the scope |
| 551 | // suffices. |
| 552 | class blockThreadState { |
| 553 | stats_state_e *state_pointer; |
| 554 | stats_state_e old_state; |
| 555 | |
| 556 | public: |
| 557 | blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state) |
| 558 | : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { |
| 559 | *state_pointer = new_state; |
| 560 | } |
| 561 | ~blockThreadState() { *state_pointer = old_state; } |
| 562 | }; |
| 563 | |
| 564 | // If all you want is a count, then you can use this... |
| 565 | // The individual per-thread counts will be aggregated into a statistic at |
| 566 | // program exit. |
| 567 | class counter { |
| 568 | uint64_t value; |
| 569 | static const statInfo counterInfo[]; |
| 570 | |
| 571 | public: |
| 572 | counter() : value(0) {} |
| 573 | void increment() { value++; } |
| 574 | uint64_t getValue() const { return value; } |
| 575 | void reset() { value = 0; } |
| 576 | static const char *name(counter_e e) { return counterInfo[e].name; } |
| 577 | static bool masterOnly(counter_e e) { |
| 578 | return counterInfo[e].flags & stats_flags_e::onlyInMaster; |
| 579 | } |
| 580 | }; |
| 581 | |
| 582 | /* **************************************************************** |
| 583 | Class to implement an event |
| 584 | |
| 585 | There are four components to an event: start time, stop time |
| 586 | nest_level, and timer_name. |
| 587 | The start and stop time should be obvious (recorded in clock ticks). |
| 588 | The nest_level relates to the bar width in the timeline graph. |
| 589 | The timer_name is used to determine which timer event triggered this event. |
| 590 | |
| 591 | the interface to this class is through four read-only operations: |
| 592 | 1) getStart() -- returns the start time as 64 bit integer |
| 593 | 2) getStop() -- returns the stop time as 64 bit integer |
| 594 | 3) getNestLevel() -- returns the nest level of the event |
| 595 | 4) getTimerName() -- returns the timer name that triggered event |
| 596 | |
| 597 | *MORE ON NEST_LEVEL* |
| 598 | The nest level is used in the bar graph that represents the timeline. |
| 599 | Its main purpose is for showing how events are nested inside each other. |
| 600 | For example, say events, A, B, and C are recorded. If the timeline |
| 601 | looks like this: |
| 602 | |
| 603 | Begin -------------------------------------------------------------> Time |
| 604 | | | | | | | |
| 605 | A B C C B A |
| 606 | start start start end end end |
| 607 | |
| 608 | Then A, B, C will have a nest level of 1, 2, 3 respectively. |
| 609 | These values are then used to calculate the barwidth so you can |
| 610 | see that inside A, B has occurred, and inside B, C has occurred. |
| 611 | Currently, this is shown with A's bar width being larger than B's |
| 612 | bar width, and B's bar width being larger than C's bar width. |
| 613 | |
| 614 | **************************************************************** */ |
| 615 | class kmp_stats_event { |
| 616 | uint64_t start; |
| 617 | uint64_t stop; |
| 618 | int nest_level; |
| 619 | timer_e timer_name; |
| 620 | |
| 621 | public: |
| 622 | kmp_stats_event() |
| 623 | : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} |
| 624 | kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) |
| 625 | : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} |
| 626 | inline uint64_t getStart() const { return start; } |
| 627 | inline uint64_t getStop() const { return stop; } |
| 628 | inline int getNestLevel() const { return nest_level; } |
| 629 | inline timer_e getTimerName() const { return timer_name; } |
| 630 | }; |
| 631 | |
| 632 | /* **************************************************************** |
| 633 | Class to implement a dynamically expandable array of events |
| 634 | |
| 635 | --------------------------------------------------------- |
| 636 | | event 1 | event 2 | event 3 | event 4 | ... | event N | |
| 637 | --------------------------------------------------------- |
| 638 | |
| 639 | An event is pushed onto the back of this array at every |
| 640 | explicitTimer->stop() call. The event records the thread #, |
| 641 | start time, stop time, and nest level related to the bar width. |
| 642 | |
| 643 | The event vector starts at size INIT_SIZE and grows (doubles in size) |
| 644 | if needed. An implication of this behavior is that log(N) |
| 645 | reallocations are needed (where N is number of events). If you want |
| 646 | to avoid reallocations, then set INIT_SIZE to a large value. |
| 647 | |
| 648 | the interface to this class is through six operations: |
| 649 | 1) reset() -- sets the internal_size back to 0 but does not deallocate any |
| 650 | memory |
| 651 | 2) size() -- returns the number of valid elements in the vector |
| 652 | 3) push_back(start, stop, nest, timer_name) -- pushes an event onto |
| 653 | the back of the array |
| 654 | 4) deallocate() -- frees all memory associated with the vector |
| 655 | 5) sort() -- sorts the vector by start time |
| 656 | 6) operator[index] or at(index) -- returns event reference at that index |
| 657 | **************************************************************** */ |
| 658 | class kmp_stats_event_vector { |
| 659 | kmp_stats_event *events; |
| 660 | int internal_size; |
| 661 | int allocated_size; |
| 662 | static const int INIT_SIZE = 1024; |
| 663 | |
| 664 | public: |
| 665 | kmp_stats_event_vector() { |
| 666 | events = |
| 667 | (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE); |
| 668 | internal_size = 0; |
| 669 | allocated_size = INIT_SIZE; |
| 670 | } |
| 671 | ~kmp_stats_event_vector() {} |
| 672 | inline void reset() { internal_size = 0; } |
| 673 | inline int size() const { return internal_size; } |
| 674 | void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, |
| 675 | timer_e name) { |
| 676 | int i; |
| 677 | if (internal_size == allocated_size) { |
| 678 | kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate( |
| 679 | sizeof(kmp_stats_event) * allocated_size * 2); |
| 680 | for (i = 0; i < internal_size; i++) |
| 681 | tmp[i] = events[i]; |
| 682 | __kmp_free(events); |
| 683 | events = tmp; |
| 684 | allocated_size *= 2; |
| 685 | } |
| 686 | events[internal_size] = |
| 687 | kmp_stats_event(start_time, stop_time, nest_level, name); |
| 688 | internal_size++; |
| 689 | return; |
| 690 | } |
| 691 | void deallocate(); |
| 692 | void sort(); |
| 693 | const kmp_stats_event &operator[](int index) const { return events[index]; } |
| 694 | kmp_stats_event &operator[](int index) { return events[index]; } |
| 695 | const kmp_stats_event &at(int index) const { return events[index]; } |
| 696 | kmp_stats_event &at(int index) { return events[index]; } |
| 697 | }; |
| 698 | |
| 699 | /* **************************************************************** |
| 700 | Class to implement a doubly-linked, circular, statistics list |
| 701 | |
| 702 | |---| ---> |---| ---> |---| ---> |---| ---> ... next |
| 703 | | | | | | | | | |
| 704 | |---| <--- |---| <--- |---| <--- |---| <--- ... prev |
| 705 | Sentinel first second third |
| 706 | Node node node node |
| 707 | |
| 708 | The Sentinel Node is the user handle on the list. |
| 709 | The first node corresponds to thread 0's statistics. |
| 710 | The second node corresponds to thread 1's statistics and so on... |
| 711 | |
| 712 | Each node has a _timers, _counters, and _explicitTimers array to hold that |
| 713 | thread's statistics. The _explicitTimers point to the correct _timer and |
| 714 | update its statistics at every stop() call. The explicitTimers' pointers are |
| 715 | set up in the constructor. Each node also has an event vector to hold that |
| 716 | thread's timing events. The event vector expands as necessary and records |
| 717 | the start-stop times for each timer. |
| 718 | |
| 719 | The nestLevel variable is for plotting events and is related |
| 720 | to the bar width in the timeline graph. |
| 721 | |
| 722 | Every thread will have a thread local pointer to its node in |
| 723 | the list. The sentinel node is used by the primary thread to |
| 724 | store "dummy" statistics before __kmp_create_worker() is called. |
| 725 | **************************************************************** */ |
| 726 | class kmp_stats_list { |
| 727 | int gtid; |
| 728 | timeStat _timers[TIMER_LAST + 1]; |
| 729 | counter _counters[COUNTER_LAST + 1]; |
| 730 | explicitTimer thread_life_timer; |
| 731 | partitionedTimers _partitionedTimers; |
| 732 | int _nestLevel; // one per thread |
| 733 | kmp_stats_event_vector _event_vector; |
| 734 | kmp_stats_list *next; |
| 735 | kmp_stats_list *prev; |
| 736 | stats_state_e state; |
| 737 | int thread_is_idle_flag; |
| 738 | |
| 739 | public: |
| 740 | kmp_stats_list() |
| 741 | : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life], |
| 742 | TIMER_OMP_worker_thread_life), |
| 743 | _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE), |
| 744 | thread_is_idle_flag(0) {} |
| 745 | ~kmp_stats_list() {} |
| 746 | inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; } |
| 747 | inline counter *getCounter(counter_e idx) { return &_counters[idx]; } |
| 748 | inline partitionedTimers *getPartitionedTimers() { |
| 749 | return &_partitionedTimers; |
| 750 | } |
| 751 | inline timeStat *getTimers() { return _timers; } |
| 752 | inline counter *getCounters() { return _counters; } |
| 753 | inline kmp_stats_event_vector &getEventVector() { return _event_vector; } |
| 754 | inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); } |
| 755 | inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(), this); } |
| 756 | inline void resetEventVector() { _event_vector.reset(); } |
| 757 | inline void incrementNestValue() { _nestLevel++; } |
| 758 | inline int getNestValue() { return _nestLevel; } |
| 759 | inline void decrementNestValue() { _nestLevel--; } |
| 760 | inline int getGtid() const { return gtid; } |
| 761 | inline void setGtid(int newgtid) { gtid = newgtid; } |
| 762 | inline void setState(stats_state_e newstate) { state = newstate; } |
| 763 | inline stats_state_e getState() const { return state; } |
| 764 | inline stats_state_e *getStatePointer() { return &state; } |
| 765 | inline bool isIdle() { return thread_is_idle_flag == 1; } |
| 766 | inline void setIdleFlag() { thread_is_idle_flag = 1; } |
| 767 | inline void resetIdleFlag() { thread_is_idle_flag = 0; } |
| 768 | kmp_stats_list *push_back(int gtid); // returns newly created list node |
| 769 | inline void push_event(uint64_t start_time, uint64_t stop_time, |
| 770 | int nest_level, timer_e name) { |
| 771 | _event_vector.push_back(start_time, stop_time, nest_level, name); |
| 772 | } |
| 773 | void deallocate(); |
| 774 | class iterator; |
| 775 | kmp_stats_list::iterator begin(); |
| 776 | kmp_stats_list::iterator end(); |
| 777 | int size(); |
| 778 | class iterator { |
| 779 | kmp_stats_list *ptr; |
| 780 | friend kmp_stats_list::iterator kmp_stats_list::begin(); |
| 781 | friend kmp_stats_list::iterator kmp_stats_list::end(); |
| 782 | |
| 783 | public: |
| 784 | iterator(); |
| 785 | ~iterator(); |
| 786 | iterator operator++(); |
| 787 | iterator operator++(int dummy); |
| 788 | iterator operator--(); |
| 789 | iterator operator--(int dummy); |
| 790 | bool operator!=(const iterator &rhs); |
| 791 | bool operator==(const iterator &rhs); |
| 792 | kmp_stats_list *operator*() const; // dereference operator |
| 793 | }; |
| 794 | }; |
| 795 | |
| 796 | /* **************************************************************** |
| 797 | Class to encapsulate all output functions and the environment variables |
| 798 | |
| 799 | This module holds filenames for various outputs (normal stats, events, plot |
| 800 | file), as well as coloring information for the plot file. |
| 801 | |
| 802 | The filenames and flags variables are read from environment variables. |
| 803 | These are read once by the constructor of the global variable |
| 804 | __kmp_stats_output which calls init(). |
| 805 | |
| 806 | During this init() call, event flags for the timeStat::timerInfo[] global |
| 807 | array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). |
| 808 | |
| 809 | The only interface function that is public is outputStats(heading). This |
| 810 | function should print out everything it needs to, either to files or stderr, |
| 811 | depending on the environment variables described below |
| 812 | |
| 813 | ENVIRONMENT VARIABLES: |
| 814 | KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this |
| 815 | file, otherwise, print to stderr |
| 816 | KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to |
| 817 | either KMP_STATS_FILE or stderr |
| 818 | KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, |
| 819 | otherwise, the plot file is sent to "events.plt" |
| 820 | KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log |
| 821 | events |
| 822 | KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, |
| 823 | otherwise, output is sent to "events.dat" |
| 824 | **************************************************************** */ |
| 825 | class kmp_stats_output_module { |
| 826 | |
| 827 | public: |
| 828 | struct rgb_color { |
| 829 | float r; |
| 830 | float g; |
| 831 | float b; |
| 832 | }; |
| 833 | |
| 834 | private: |
| 835 | std::string outputFileName; |
| 836 | static const char *eventsFileName; |
| 837 | static const char *plotFileName; |
| 838 | static int printPerThreadFlag; |
| 839 | static int printPerThreadEventsFlag; |
| 840 | static const rgb_color globalColorArray[]; |
| 841 | static rgb_color timerColorInfo[]; |
| 842 | |
| 843 | void init(); |
| 844 | static void setupEventColors(); |
| 845 | static void printPloticusFile(); |
| 846 | static void printHeaderInfo(FILE *statsOut); |
| 847 | static void printTimerStats(FILE *statsOut, statistic const *theStats, |
| 848 | statistic const *totalStats); |
| 849 | static void printCounterStats(FILE *statsOut, statistic const *theStats); |
| 850 | static void printCounters(FILE *statsOut, counter const *theCounters); |
| 851 | static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents, |
| 852 | int gtid); |
| 853 | static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } |
| 854 | static void windupExplicitTimers(); |
| 855 | bool eventPrintingEnabled() const { return printPerThreadEventsFlag; } |
| 856 | |
| 857 | public: |
| 858 | kmp_stats_output_module() { init(); } |
| 859 | void outputStats(const char *heading); |
| 860 | }; |
| 861 | |
| 862 | #ifdef __cplusplus |
| 863 | extern "C" { |
| 864 | #endif |
| 865 | void __kmp_stats_init(); |
| 866 | void __kmp_stats_fini(); |
| 867 | void __kmp_reset_stats(); |
| 868 | void __kmp_output_stats(const char *); |
| 869 | void __kmp_accumulate_stats_at_exit(void); |
| 870 | // thread local pointer to stats node within list |
| 871 | extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr; |
| 872 | // head to stats list. |
| 873 | extern kmp_stats_list *__kmp_stats_list; |
| 874 | // lock for __kmp_stats_list |
| 875 | extern kmp_tas_lock_t __kmp_stats_lock; |
| 876 | // reference start time |
| 877 | extern tsc_tick_count __kmp_stats_start_time; |
| 878 | // interface to output |
| 879 | extern kmp_stats_output_module __kmp_stats_output; |
| 880 | |
| 881 | #ifdef __cplusplus |
| 882 | } |
| 883 | #endif |
| 884 | |
| 885 | // Simple, standard interfaces that drop out completely if stats aren't enabled |
| 886 | |
| 887 | /*! |
| 888 | * \brief Adds value to specified timer (name). |
| 889 | * |
| 890 | * @param name timer name as specified under the KMP_FOREACH_TIMER() macro |
| 891 | * @param value double precision sample value to add to statistics for the timer |
| 892 | * |
| 893 | * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to |
| 894 | * a timer statistics. |
| 895 | * |
| 896 | * @ingroup STATS_GATHERING |
| 897 | */ |
| 898 | #define KMP_COUNT_VALUE(name, value) \ |
| 899 | __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample((double)value) |
| 900 | |
| 901 | /*! |
| 902 | * \brief Increments specified counter (name). |
| 903 | * |
| 904 | * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro |
| 905 | * |
| 906 | * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics |
| 907 | * counter for the executing thread. |
| 908 | * |
| 909 | * @ingroup STATS_GATHERING |
| 910 | */ |
| 911 | #define KMP_COUNT_BLOCK(name) \ |
| 912 | __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() |
| 913 | |
| 914 | /*! |
| 915 | * \brief Outputs the current thread statistics and reset them. |
| 916 | * |
| 917 | * @param heading_string heading put above the final stats output |
| 918 | * |
| 919 | * \details Explicitly stops all timers and outputs all stats. Environment |
| 920 | * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a |
| 921 | * filename instead of stderr. Environment variable, |
| 922 | * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific |
| 923 | * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be |
| 924 | * defined with any value, which will print out thread specific stats, or it can |
| 925 | * be undefined (not specified in the environment) and thread specific stats |
| 926 | * won't be printed. It should be noted that all statistics are reset when this |
| 927 | * macro is called. |
| 928 | * |
| 929 | * @ingroup STATS_GATHERING |
| 930 | */ |
| 931 | #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) |
| 932 | |
| 933 | /*! |
| 934 | * \brief Initializes the partitioned timers to begin with name. |
| 935 | * |
| 936 | * @param name timer which you want this thread to begin with |
| 937 | * |
| 938 | * @ingroup STATS_GATHERING |
| 939 | */ |
| 940 | #define KMP_INIT_PARTITIONED_TIMERS(name) \ |
| 941 | __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer( \ |
| 942 | __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) |
| 943 | |
| 944 | #define KMP_TIME_PARTITIONED_BLOCK(name) \ |
| 945 | blockPartitionedTimer __PBLOCKTIME__( \ |
| 946 | __kmp_stats_thread_ptr->getPartitionedTimers(), \ |
| 947 | explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ |
| 948 | TIMER_##name)) |
| 949 | |
| 950 | #define KMP_PUSH_PARTITIONED_TIMER(name) \ |
| 951 | __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer( \ |
| 952 | __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) |
| 953 | |
| 954 | #define KMP_POP_PARTITIONED_TIMER() \ |
| 955 | __kmp_stats_thread_ptr->getPartitionedTimers()->pop() |
| 956 | |
| 957 | #define KMP_EXCHANGE_PARTITIONED_TIMER(name) \ |
| 958 | __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer( \ |
| 959 | __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) |
| 960 | |
| 961 | #define KMP_SET_THREAD_STATE(state_name) \ |
| 962 | __kmp_stats_thread_ptr->setState(state_name) |
| 963 | |
| 964 | #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() |
| 965 | |
| 966 | #define KMP_SET_THREAD_STATE_BLOCK(state_name) \ |
| 967 | blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ |
| 968 | state_name) |
| 969 | |
| 970 | /*! |
| 971 | * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) |
| 972 | * |
| 973 | * \details Reset all stats for all threads. |
| 974 | * |
| 975 | * @ingroup STATS_GATHERING |
| 976 | */ |
| 977 | #define KMP_RESET_STATS() __kmp_reset_stats() |
| 978 | |
| 979 | #if (KMP_DEVELOPER_STATS) |
| 980 | #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) |
| 981 | #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) |
| 982 | #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) |
| 983 | #define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) KMP_PUSH_PARTITIONED_TIMER(n) |
| 984 | #define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) KMP_POP_PARTITIONED_TIMER(n) |
| 985 | #define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) \ |
| 986 | KMP_EXCHANGE_PARTITIONED_TIMER(n) |
| 987 | #else |
| 988 | // Null definitions |
| 989 | #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) |
| 990 | #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) |
| 991 | #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) |
| 992 | #define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) |
| 993 | #define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) |
| 994 | #define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) |
| 995 | #endif |
| 996 | |
| 997 | #else // KMP_STATS_ENABLED |
| 998 | |
| 999 | // Null definitions |
| 1000 | #define KMP_COUNT_VALUE(n, v) ((void)0) |
| 1001 | #define KMP_COUNT_BLOCK(n) ((void)0) |
| 1002 | |
| 1003 | #define KMP_OUTPUT_STATS(heading_string) ((void)0) |
| 1004 | #define KMP_RESET_STATS() ((void)0) |
| 1005 | |
| 1006 | #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) |
| 1007 | #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) |
| 1008 | #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) |
| 1009 | #define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) |
| 1010 | #define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) |
| 1011 | #define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) |
| 1012 | #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) |
| 1013 | #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) |
| 1014 | #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) |
| 1015 | #define KMP_POP_PARTITIONED_TIMER() ((void)0) |
| 1016 | #define KMP_SET_THREAD_STATE(state_name) ((void)0) |
| 1017 | #define KMP_GET_THREAD_STATE() ((void)0) |
| 1018 | #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) |
| 1019 | #endif // KMP_STATS_ENABLED |
| 1020 | |
| 1021 | #endif // KMP_STATS_H |
| 1022 | |