1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5#include "precomp.hpp"
6
7#include <opencv2/core/utils/trace.hpp>
8#include <opencv2/core/utils/trace.private.hpp>
9#include <opencv2/core/utils/configuration.private.hpp>
10
11#include <opencv2/core/opencl/ocl_defs.hpp>
12
13#include <cstdarg> // va_start
14
15#include <sstream>
16#include <ostream>
17#include <fstream>
18
19#if 0
20#define CV_LOG(...) CV_LOG_INFO(NULL, __VA_ARGS__)
21#else
22#define CV_LOG(...) {}
23#endif
24
25#if 0
26#define CV_LOG_ITT(...) CV_LOG_INFO(NULL, __VA_ARGS__)
27#else
28#define CV_LOG_ITT(...) {}
29#endif
30
31#if 1
32#define CV_LOG_TRACE_BAILOUT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
33#else
34#define CV_LOG_TRACE_BAILOUT(...) {}
35#endif
36
37#if 0
38#define CV_LOG_PARALLEL(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
39#else
40#define CV_LOG_PARALLEL(...) {}
41#endif
42
43#if 0
44#define CV_LOG_CTX_STAT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
45#else
46#define CV_LOG_CTX_STAT(...) {}
47#endif
48
49#if 0
50#define CV_LOG_SKIP(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
51#else
52#define CV_LOG_SKIP(...) {}
53#endif
54
55namespace cv {
56namespace utils {
57namespace trace {
58namespace details {
59
60#ifdef OPENCV_TRACE
61
62#ifdef _MSC_VER
63#pragma warning(disable:4065) // switch statement contains 'default' but no 'case' labels
64#endif
65
66static bool getParameterTraceEnable()
67{
68 static bool param_traceEnable = utils::getConfigurationParameterBool(name: "OPENCV_TRACE", defaultValue: false);
69 return param_traceEnable;
70}
71
72// TODO lazy configuration flags
73static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT(name: "OPENCV_TRACE_DEPTH_OPENCV", defaultValue: 1);
74static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT(name: "OPENCV_TRACE_MAX_CHILDREN_OPENCV", defaultValue: 1000);
75static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT(name: "OPENCV_TRACE_MAX_CHILDREN", defaultValue: 10000);
76
77static const cv::String& getParameterTraceLocation()
78{
79 static cv::String param_traceLocation = utils::getConfigurationParameterString(name: "OPENCV_TRACE_LOCATION", defaultValue: "OpenCVTrace");
80 return param_traceLocation;
81}
82
83#ifdef HAVE_OPENCL
84static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool(name: "OPENCV_TRACE_SYNC_OPENCL", defaultValue: false);
85#endif
86
87#ifdef OPENCV_WITH_ITT
88static bool param_ITT_registerParentScope = utils::getConfigurationParameterBool(name: "OPENCV_TRACE_ITT_PARENT", defaultValue: false);
89#endif
90
91static const char* _spaces(int count)
92{
93 static const char buf[64] =
94" ";
95 return &buf[63 - (count & 63)];
96}
97
98/**
99 * Text-based trace messages
100 */
101class TraceMessage
102{
103public:
104 char buffer[1024];
105 size_t len;
106 bool hasError;
107
108 TraceMessage() :
109 len(0),
110 hasError(false)
111 {}
112
113 bool printf(const char* format, ...)
114 {
115 char* buf = &buffer[len];
116 size_t sz = sizeof(buffer) - len;
117 va_list ap;
118 va_start(ap, format);
119 int n = cv_vsnprintf(buf, len: (int)sz, fmt: format, args: ap);
120 va_end(ap);
121 if (n < 0 || (size_t)n > sz)
122 {
123 hasError = true;
124 return false;
125 }
126 len += n;
127 return true;
128 }
129
130 bool formatlocation(const Region::LocationStaticStorage& location)
131 {
132 return this->printf(format: "l,%lld,\"%s\",%d,\"%s\",0x%llX\n",
133 (long long int)(*location.ppExtra)->global_location_id,
134 location.filename,
135 location.line,
136 location.name,
137 (long long int)(location.flags & ~0xF0000000));
138 }
139 bool formatRegionEnter(const Region& region)
140 {
141 bool ok = this->printf(format: "b,%d,%lld,%lld,%lld",
142 (int)region.pImpl->threadID,
143 (long long int)region.pImpl->beginTimestamp,
144 (long long int)((*region.pImpl->location.ppExtra)->global_location_id),
145 (long long int)region.pImpl->global_region_id);
146 if (region.pImpl->parentRegion && region.pImpl->parentRegion->pImpl)
147 {
148 if (region.pImpl->parentRegion->pImpl->threadID != region.pImpl->threadID)
149 ok &= this->printf(format: ",parentThread=%d,parent=%lld",
150 (int)region.pImpl->parentRegion->pImpl->threadID,
151 (long long int)region.pImpl->parentRegion->pImpl->global_region_id);
152 }
153 ok &= this->printf(format: "\n");
154 return ok;
155 }
156 bool formatRegionLeave(const Region& region, const RegionStatistics& result)
157 {
158 CV_DbgAssert(region.pImpl->endTimestamp - region.pImpl->beginTimestamp == result.duration);
159 bool ok = this->printf(format: "e,%d,%lld,%lld,%lld,%lld",
160 (int)region.pImpl->threadID,
161 (long long int)region.pImpl->endTimestamp,
162 (long long int)(*region.pImpl->location.ppExtra)->global_location_id,
163 (long long int)region.pImpl->global_region_id,
164 (long long int)result.duration);
165 if (result.currentSkippedRegions)
166 ok &= this->printf(format: ",skip=%d", (int)result.currentSkippedRegions);
167#ifdef HAVE_IPP
168 if (result.durationImplIPP)
169 ok &= this->printf(format: ",tIPP=%lld", (long long int)result.durationImplIPP);
170#endif
171#ifdef HAVE_OPENCL
172 if (result.durationImplOpenCL)
173 ok &= this->printf(format: ",tOCL=%lld", (long long int)result.durationImplOpenCL);
174#endif
175#ifdef HAVE_OPENVX
176 if (result.durationImplOpenVX)
177 ok &= this->printf(",tOVX=%lld", (long long int)result.durationImplOpenVX);
178#endif
179 ok &= this->printf(format: "\n");
180 return ok;
181 }
182 bool recordRegionArg(const Region& region, const TraceArg& arg, const char* value)
183 {
184 return this->printf(format: "a,%d,%lld,%lld,\"%s\",\"%s\"\n",
185 region.pImpl->threadID,
186 (long long int)region.pImpl->beginTimestamp,
187 (long long int)region.pImpl->global_region_id,
188 arg.name,
189 value);
190 }
191};
192
193
194#ifdef OPENCV_WITH_ITT
195static __itt_domain* domain = NULL;
196
197static bool isITTEnabled()
198{
199 static volatile bool isInitialized = false;
200 static bool isEnabled = false;
201 if (!isInitialized)
202 {
203 cv::AutoLock lock(cv::getInitializationMutex());
204 if (!isInitialized)
205 {
206 bool param_traceITTEnable = utils::getConfigurationParameterBool(name: "OPENCV_TRACE_ITT_ENABLE", defaultValue: true);
207 if (param_traceITTEnable)
208 {
209 isEnabled = !!(__itt_api_version());
210 CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled"));
211 domain = __itt_domain_create("OpenCVTrace");
212 }
213 else
214 {
215 CV_LOG_ITT("ITT is disabled through OpenCV parameter");
216 isEnabled = false;
217 }
218 isInitialized = true;
219 }
220 }
221 return isEnabled;
222}
223#endif
224
225
226Region::LocationExtraData::LocationExtraData(const LocationStaticStorage& location)
227{
228 CV_UNUSED(location);
229 static int g_location_id_counter = 0;
230 global_location_id = CV_XADD(&g_location_id_counter, 1) + 1;
231 CV_LOG("Register location: " << global_location_id << " (" << (void*)&location << ")"
232 << std::endl << " file: " << location.filename
233 << std::endl << " line: " << location.line
234 << std::endl << " name: " << location.name);
235#ifdef OPENCV_WITH_ITT
236 if (isITTEnabled())
237 {
238 // Caching is not required here, because there is builtin cache.
239 // https://software.intel.com/en-us/node/544203:
240 // Consecutive calls to __itt_string_handle_create with the same name return the same value.
241 ittHandle_name = __itt_string_handle_create(location.name);
242 ittHandle_filename = __itt_string_handle_create(location.filename);
243 }
244 else
245 {
246 ittHandle_name = 0;
247 ittHandle_filename = 0;
248 }
249#endif
250}
251
252/*static*/ Region::LocationExtraData* Region::LocationExtraData::init(const Region::LocationStaticStorage& location)
253{
254 LocationExtraData** pLocationExtra = location.ppExtra;
255 CV_DbgAssert(pLocationExtra);
256 if (*pLocationExtra == NULL)
257 {
258 cv::AutoLock lock(cv::getInitializationMutex());
259 if (*pLocationExtra == NULL)
260 {
261 *pLocationExtra = new Region::LocationExtraData(location);
262 TraceStorage* s = getTraceManager().trace_storage.get();
263 if (s)
264 {
265 TraceMessage msg;
266 msg.formatlocation(location);
267 s->put(msg);
268 }
269 }
270 }
271 return *pLocationExtra;
272}
273
274
275Region::Impl::Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_) :
276 location(location_),
277 region(region_),
278 parentRegion(parentRegion_),
279 threadID(ctx.threadID),
280 global_region_id(++ctx.region_counter),
281 beginTimestamp(beginTimestamp_),
282 endTimestamp(0),
283 directChildrenCount(0)
284#ifdef OPENCV_WITH_ITT
285 ,itt_id_registered(false)
286 ,itt_id(__itt_null)
287#endif
288{
289 CV_DbgAssert(ctx.currentActiveRegion == parentRegion);
290 region.pImpl = this;
291
292 registerRegion(ctx);
293
294 enterRegion(ctx);
295}
296
297Region::Impl::~Impl()
298{
299#ifdef OPENCV_WITH_ITT
300 if (itt_id_registered)
301 {
302 CV_LOG_ITT(" Destroy ITT region: I=" << (void*)this);
303 __itt_id_destroy(domain, itt_id);
304 itt_id_registered = false;
305 }
306#endif
307 region.pImpl = NULL;
308}
309
310void Region::Impl::enterRegion(TraceManagerThreadLocal& ctx)
311{
312 ctx.currentActiveRegion = &region;
313
314 if (location.flags & REGION_FLAG_FUNCTION)
315 {
316 if ((location.flags & REGION_FLAG_APP_CODE) == 0)
317 {
318 ctx.regionDepthOpenCV++;
319 }
320 ctx.regionDepth++;
321 }
322
323 TraceStorage* s = ctx.getStorage();
324 if (s)
325 {
326 TraceMessage msg;
327 msg.formatRegionEnter(region);
328 s->put(msg);
329 }
330#ifdef OPENCV_WITH_ITT
331 if (isITTEnabled())
332 {
333 __itt_id parentID = __itt_null;
334 if (param_ITT_registerParentScope && parentRegion && parentRegion->pImpl && parentRegion->pImpl->itt_id_registered && (location.flags & REGION_FLAG_REGION_FORCE) == 0)
335 parentID = parentRegion->pImpl->itt_id;
336 __itt_task_begin(domain, itt_id, parentID, (*location.ppExtra)->ittHandle_name);
337 }
338#endif
339}
340
341void Region::Impl::leaveRegion(TraceManagerThreadLocal& ctx)
342{
343 int64 duration = endTimestamp - beginTimestamp; CV_UNUSED(duration);
344 RegionStatistics result;
345 ctx.stat.grab(result);
346 ctx.totalSkippedEvents += result.currentSkippedRegions;
347 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "leaveRegion(): " << (void*)this << " " << result);
348#ifdef OPENCV_WITH_ITT
349 if (isITTEnabled())
350 {
351 if (result.currentSkippedRegions)
352 {
353 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("skipped trace entries"), __itt_metadata_u32, 1, &result.currentSkippedRegions);
354 }
355#ifdef HAVE_IPP
356 if (result.durationImplIPP)
357 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tIPP"), __itt_metadata_u64, 1, &result.durationImplIPP);
358#endif
359#ifdef HAVE_OPENCL
360 if (result.durationImplOpenCL)
361 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenCL"), __itt_metadata_u64, 1, &result.durationImplOpenCL);
362#endif
363#ifdef HAVE_OPENVX
364 if (result.durationImplOpenVX)
365 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenVX"), __itt_metadata_u64, 1, &result.durationImplOpenVX);
366#endif
367 __itt_task_end(domain);
368 }
369#endif
370 TraceStorage* s = ctx.getStorage();
371 if (s)
372 {
373 TraceMessage msg;
374 msg.formatRegionLeave(region, result);
375 s->put(msg);
376 }
377
378 if (location.flags & REGION_FLAG_FUNCTION)
379 {
380 if ((location.flags & REGION_FLAG_APP_CODE) == 0)
381 {
382 ctx.regionDepthOpenCV--;
383 }
384 ctx.regionDepth--;
385 }
386
387 ctx.currentActiveRegion = parentRegion;
388}
389
390void Region::Impl::release()
391{
392 delete this;
393}
394
395void Region::Impl::registerRegion(TraceManagerThreadLocal& ctx)
396{
397#ifdef OPENCV_WITH_ITT
398 if (isITTEnabled())
399 {
400 if (!itt_id_registered)
401 {
402 CV_LOG_ITT(" Register ITT region: I=" << (void*)this << " " << ctx.threadID << "-" << global_region_id);
403#if 1 // workaround for some ITT backends
404 itt_id = __itt_id_make(addr: (void*)(intptr_t)(((int64)(ctx.threadID + 1) << 32) | global_region_id), extra: global_region_id);
405#else
406 itt_id = __itt_id_make((void*)(intptr_t)(ctx.threadID + 1), global_region_id);
407#endif
408 __itt_id_create(domain, itt_id);
409 itt_id_registered = true;
410 }
411 }
412#else
413 CV_UNUSED(ctx);
414#endif
415}
416
417void RegionStatisticsStatus::enableSkipMode(int depth)
418{
419 CV_DbgAssert(_skipDepth < 0);
420 CV_LOG_SKIP(NULL, "SKIP-ENABLE: depth=" << depth);
421 _skipDepth = depth;
422}
423void RegionStatisticsStatus::checkResetSkipMode(int leaveDepth)
424{
425 if (leaveDepth <= _skipDepth)
426 {
427 CV_LOG_SKIP(NULL, "SKIP-RESET: leaveDepth=" << leaveDepth << " skipDepth=" << _skipDepth);
428 _skipDepth = -1;
429 }
430}
431
432Region::Region(const LocationStaticStorage& location) :
433 pImpl(NULL),
434 implFlags(0)
435{
436 // Checks:
437 // - global enable flag
438 // - parent region is disabled
439 // - children count threshold
440 // - region location
441 // - depth (opencv nested calls)
442 if (!TraceManager::isActivated())
443 {
444 CV_LOG("Trace is disabled. Bailout");
445 return;
446 }
447
448 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
449 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region(): " << (void*)this << ": " << location.name);
450
451 Region* parentRegion = ctx.stackTopRegion();
452 const Region::LocationStaticStorage* parentLocation = ctx.stackTopLocation();
453
454 if (location.flags & REGION_FLAG_REGION_NEXT)
455 {
456 if (parentRegion && parentRegion->pImpl)
457 {
458 CV_DbgAssert((parentRegion->pImpl->location.flags & REGION_FLAG_FUNCTION) == 0);
459 parentRegion->destroy(); parentRegion->implFlags = 0;
460 parentRegion = ctx.stackTopRegion();
461 parentLocation = ctx.stackTopLocation();
462 }
463 }
464
465 int parentChildren = 0;
466 if (parentRegion && parentRegion->pImpl)
467 {
468 if (parentLocation == NULL)
469 {
470 // parallel_for_body code path
471 parentChildren = CV_XADD(&parentRegion->pImpl->directChildrenCount, 1) + 1;
472 }
473 else
474 {
475 parentChildren = ++parentRegion->pImpl->directChildrenCount;
476 }
477 }
478
479 int64 beginTimestamp = getTimestampNS();
480
481 int currentDepth = ctx.getCurrentDepth() + 1;
482 switch (location.flags & REGION_FLAG_IMPL_MASK)
483 {
484#ifdef HAVE_IPP
485 case REGION_FLAG_IMPL_IPP:
486 if (!ctx.stat_status.ignoreDepthImplIPP)
487 ctx.stat_status.ignoreDepthImplIPP = currentDepth;
488 break;
489#endif
490#ifdef HAVE_OPENCL
491 case REGION_FLAG_IMPL_OPENCL:
492 if (!ctx.stat_status.ignoreDepthImplOpenCL)
493 ctx.stat_status.ignoreDepthImplOpenCL = currentDepth;
494 break;
495#endif
496#ifdef HAVE_OPENVX
497 case REGION_FLAG_IMPL_OPENVX:
498 if (!ctx.stat_status.ignoreDepthImplOpenVX)
499 ctx.stat_status.ignoreDepthImplOpenVX = currentDepth;
500 break;
501#endif
502 default:
503 break;
504 }
505
506 ctx.stackPush(region: this, location: &location, beginTimestamp);
507 implFlags |= REGION_FLAG__NEED_STACK_POP;
508
509 if ((location.flags & REGION_FLAG_REGION_FORCE) == 0)
510 {
511 if (ctx.stat_status._skipDepth >= 0 && currentDepth > ctx.stat_status._skipDepth)
512 {
513 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region is disabled. Bailout");
514 ctx.stat.currentSkippedRegions++;
515 return;
516 }
517
518 if (param_maxRegionChildrenOpenCV > 0 && (location.flags & REGION_FLAG_APP_CODE) == 0 && parentLocation && (parentLocation->flags & REGION_FLAG_APP_CODE) == 0)
519 {
520 if (parentChildren >= param_maxRegionChildrenOpenCV)
521 {
522 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "OpenCV parent region exceeds children count. Bailout");
523 ctx.stat_status.enableSkipMode(depth: currentDepth - 1);
524 ctx.stat.currentSkippedRegions++;
525 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
526 return;
527 }
528 }
529 if (param_maxRegionChildren > 0 && parentChildren >= param_maxRegionChildren)
530 {
531 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Parent region exceeds children count. Bailout");
532 ctx.stat_status.enableSkipMode(depth: currentDepth - 1);
533 ctx.stat.currentSkippedRegions++;
534 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
535 return;
536 }
537 }
538
539 LocationExtraData::init(location);
540
541 if ((*location.ppExtra)->global_location_id == 0)
542 {
543 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Region location is disabled. Bailout");
544 ctx.stat_status.enableSkipMode(depth: currentDepth);
545 ctx.stat.currentSkippedRegions++;
546 return;
547 }
548
549 if (parentLocation && (parentLocation->flags & REGION_FLAG_SKIP_NESTED))
550 {
551 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region disables inner regions. Bailout");
552 ctx.stat_status.enableSkipMode(depth: currentDepth);
553 ctx.stat.currentSkippedRegions++;
554 return;
555 }
556
557 if (param_maxRegionDepthOpenCV)
558 {
559 if ((location.flags & REGION_FLAG_APP_CODE) == 0)
560 {
561 if (ctx.regionDepthOpenCV >= param_maxRegionDepthOpenCV)
562 {
563 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "OpenCV region depth is exceed = " << ctx.regionDepthOpenCV << ". Bailout");
564 if (ctx.stat.currentSkippedRegions == 0)
565 {
566 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
567 }
568 ctx.stat_status.enableSkipMode(depth: currentDepth);
569 ctx.stat.currentSkippedRegions++;
570 return;
571 }
572 }
573 }
574
575 new Impl(ctx, parentRegion, *this, location, beginTimestamp);
576 CV_DbgAssert(pImpl != NULL);
577 implFlags |= REGION_FLAG__ACTIVE;
578
579 // parallel_for path
580 if (parentRegion && parentRegion->pImpl)
581 {
582 if (parentLocation == NULL)
583 {
584 pImpl->directChildrenCount = parentChildren;
585 }
586 }
587}
588
589void Region::destroy()
590{
591 CV_DbgAssert(implFlags != 0);
592
593 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
594 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region::destruct(): " << (void*)this << " pImpl=" << pImpl << " implFlags=" << implFlags << ' ' << (ctx.stackTopLocation() ? ctx.stackTopLocation()->name : "<unknown>"));
595
596 CV_DbgAssert(implFlags & REGION_FLAG__NEED_STACK_POP);
597 const int currentDepth = ctx.getCurrentDepth(); CV_UNUSED(currentDepth);
598
599 CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << ctx.stat << ' ' << ctx.stat_status);
600
601 const Region::LocationStaticStorage* location = ctx.stackTopLocation();
602 Impl::OptimizationPath myCodePath = Impl::CODE_PATH_PLAIN;
603 if (location)
604 {
605 switch (location->flags & REGION_FLAG_IMPL_MASK)
606 {
607#ifdef HAVE_IPP
608 case REGION_FLAG_IMPL_IPP:
609 myCodePath = Impl::CODE_PATH_IPP;
610 break;
611#endif
612#ifdef HAVE_OPENCL
613 case REGION_FLAG_IMPL_OPENCL:
614 if (param_synchronizeOpenCL && cv::ocl::isOpenCLActivated())
615 cv::ocl::finish();
616 myCodePath = Impl::CODE_PATH_OPENCL;
617 break;
618#endif
619#ifdef HAVE_OPENVX
620 case REGION_FLAG_IMPL_OPENVX:
621 myCodePath = Impl::CODE_PATH_OPENVX;
622 break;
623#endif
624 default:
625 break;
626 }
627 }
628
629 int64 endTimestamp = getTimestampNS();
630 int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
631
632 bool active = isActive();
633
634 if (active)
635 ctx.stat.duration = duration;
636 else if (ctx.stack.size() == ctx.parallel_for_stack_size + 1)
637 ctx.stat.duration += duration;
638
639 switch (myCodePath) {
640 case Impl::CODE_PATH_PLAIN:
641 // nothing
642 break;
643#ifdef HAVE_IPP
644 case Impl::CODE_PATH_IPP:
645 if (ctx.stat_status.ignoreDepthImplIPP == currentDepth)
646 {
647 ctx.stat.durationImplIPP += duration;
648 ctx.stat_status.ignoreDepthImplIPP = 0;
649 }
650 else if (active)
651 {
652 ctx.stat.durationImplIPP = duration;
653 }
654 break;
655#endif
656#ifdef HAVE_OPENCL
657 case Impl::CODE_PATH_OPENCL:
658 if (ctx.stat_status.ignoreDepthImplOpenCL == currentDepth)
659 {
660 ctx.stat.durationImplOpenCL += duration;
661 ctx.stat_status.ignoreDepthImplOpenCL = 0;
662 }
663 else if (active)
664 {
665 ctx.stat.durationImplOpenCL = duration;
666 }
667 break;
668#endif
669#ifdef HAVE_OPENVX
670 case Impl::CODE_PATH_OPENVX:
671 if (ctx.stat_status.ignoreDepthImplOpenVX == currentDepth)
672 {
673 ctx.stat.durationImplOpenVX += duration;
674 ctx.stat_status.ignoreDepthImplOpenVX = 0;
675 }
676 else if (active)
677 {
678 ctx.stat.durationImplOpenVX = duration;
679 }
680 break;
681#endif
682 default:
683 break;
684 }
685
686 if (pImpl)
687 {
688 CV_DbgAssert((implFlags & (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)) == (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP));
689 CV_DbgAssert(ctx.stackTopRegion() == this);
690 pImpl->endTimestamp = endTimestamp;
691 pImpl->leaveRegion(ctx);
692 pImpl->release();
693 pImpl = NULL;
694 DEBUG_ONLY(implFlags &= ~REGION_FLAG__ACTIVE);
695 }
696 else
697 {
698 CV_DbgAssert(ctx.stat_status._skipDepth <= currentDepth);
699 }
700
701 if (implFlags & REGION_FLAG__NEED_STACK_POP)
702 {
703 CV_DbgAssert(ctx.stackTopRegion() == this);
704 ctx.stackPop();
705 ctx.stat_status.checkResetSkipMode(leaveDepth: currentDepth);
706 DEBUG_ONLY(implFlags &= ~REGION_FLAG__NEED_STACK_POP);
707 }
708 CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << "===> " << ctx.stat << ' ' << ctx.stat_status);
709}
710
711
712TraceManagerThreadLocal::~TraceManagerThreadLocal()
713{
714}
715
716void TraceManagerThreadLocal::dumpStack(std::ostream& out, bool onlyFunctions) const
717{
718 std::stringstream ss;
719 std::deque<StackEntry>::const_iterator it = stack.begin();
720 std::deque<StackEntry>::const_iterator end = stack.end();
721 int depth = 0;
722 for (; it != end; ++it)
723 {
724 const Region::LocationStaticStorage* location = it->location;
725 if (location)
726 {
727 if (!onlyFunctions || (location->flags & REGION_FLAG_FUNCTION))
728 {
729 ss << _spaces(count: 4*depth) << location->name << std::endl;
730 depth++;
731 }
732 }
733 else
734 {
735 ss << _spaces(count: 4*depth) << "<unknown>" << std::endl;
736 depth++;
737 }
738 }
739 out << ss.str();
740}
741
742class AsyncTraceStorage CV_FINAL : public TraceStorage
743{
744 mutable std::ofstream out;
745public:
746 const std::string name;
747
748 AsyncTraceStorage(const std::string& filename) :
749 out(filename.c_str(), std::ios::trunc),
750 name(filename)
751 {
752 out << "#description: OpenCV trace file" << std::endl;
753 out << "#version: 1.0" << std::endl;
754 }
755 ~AsyncTraceStorage()
756 {
757 out.close();
758 }
759
760 bool put(const TraceMessage& msg) const CV_OVERRIDE
761 {
762 if (msg.hasError)
763 return false;
764 out << msg.buffer;
765 //DEBUG_ONLY(std::flush(out)); // TODO configure flag
766 return true;
767 }
768};
769
770class SyncTraceStorage CV_FINAL : public TraceStorage
771{
772 mutable std::ofstream out;
773 mutable cv::Mutex mutex;
774public:
775 const std::string name;
776
777 SyncTraceStorage(const std::string& filename) :
778 out(filename.c_str(), std::ios::trunc),
779 name(filename)
780 {
781 out << "#description: OpenCV trace file" << std::endl;
782 out << "#version: 1.0" << std::endl;
783 }
784 ~SyncTraceStorage()
785 {
786 cv::AutoLock l(mutex);
787 out.close();
788 }
789
790 bool put(const TraceMessage& msg) const CV_OVERRIDE
791 {
792 if (msg.hasError)
793 return false;
794 {
795 cv::AutoLock l(mutex);
796 out << msg.buffer;
797 std::flush(os&: out); // TODO configure flag
798 }
799 return true;
800 }
801};
802
803
804TraceStorage* TraceManagerThreadLocal::getStorage() const
805{
806 // TODO configuration option for stdout/single trace file
807 if (storage.empty())
808 {
809 TraceStorage* global = getTraceManager().trace_storage.get();
810 if (global)
811 {
812 const std::string filepath = cv::format(fmt: "%s-%03d.txt", getParameterTraceLocation().c_str(), threadID).c_str();
813 TraceMessage msg;
814 const char* pos = strrchr(s: filepath.c_str(), c: '/'); // extract filename
815#ifdef _WIN32
816 if (!pos)
817 pos = strrchr(filepath.c_str(), '\\');
818#endif
819 if (!pos)
820 pos = filepath.c_str();
821 else
822 pos += 1; // fix to skip extra slash in filename beginning
823 msg.printf(format: "#thread file: %s\n", pos);
824 global->put(msg);
825 storage.reset(ptr: new AsyncTraceStorage(filepath));
826 }
827 }
828 return storage.get();
829}
830
831
832
833static bool activated = false;
834static bool isInitialized = false;
835
836TraceManager::TraceManager()
837{
838 (void)cv::getTimestampNS();
839
840 isInitialized = true;
841 CV_LOG("TraceManager ctor: " << (void*)this);
842
843 CV_LOG("TraceManager configure()");
844 activated = getParameterTraceEnable();
845
846 if (activated)
847 trace_storage.reset(ptr: new SyncTraceStorage(std::string(getParameterTraceLocation()) + ".txt"));
848
849#ifdef OPENCV_WITH_ITT
850 if (isITTEnabled())
851 {
852 activated = true; // force trace pipeline activation (without OpenCV storage)
853 __itt_region_begin(domain, __itt_null, __itt_null, __itt_string_handle_create("OpenCVTrace"));
854 }
855#endif
856}
857TraceManager::~TraceManager()
858{
859 CV_LOG("TraceManager dtor: " << (void*)this);
860
861#ifdef OPENCV_WITH_ITT
862 if (isITTEnabled())
863 {
864 __itt_region_end(domain, __itt_null);
865 }
866#endif
867
868 std::vector<TraceManagerThreadLocal*> threads_ctx;
869 tls.gather(data&: threads_ctx);
870 size_t totalEvents = 0, totalSkippedEvents = 0;
871 for (size_t i = 0; i < threads_ctx.size(); i++)
872 {
873 TraceManagerThreadLocal* ctx = threads_ctx[i];
874 if (ctx)
875 {
876 totalEvents += ctx->region_counter;
877 totalSkippedEvents += ctx->totalSkippedEvents;
878 }
879 }
880 if (totalEvents || activated)
881 {
882 CV_LOG_INFO(NULL, "Trace: Total events: " << totalEvents);
883 }
884 if (totalSkippedEvents)
885 {
886 CV_LOG_WARNING(NULL, "Trace: Total skipped events: " << totalSkippedEvents);
887 }
888
889 // This is a global static object, so process starts shutdown here
890 // Turn off trace
891 cv::__termination = true; // also set in DllMain() notifications handler for DLL_PROCESS_DETACH
892 activated = false;
893}
894
895bool TraceManager::isActivated()
896{
897 // Check if process starts shutdown, and set earlyExit to true
898 // to prevent further instrumentation processing earlier.
899 if (cv::__termination)
900 {
901 activated = false;
902 return false;
903 }
904
905 if (!isInitialized)
906 {
907 TraceManager& m = getTraceManager();
908 CV_UNUSED(m); // TODO
909 }
910
911 return activated;
912}
913
914
915static TraceManager* getTraceManagerCallOnce()
916{
917 static TraceManager globalInstance;
918 return &globalInstance;
919}
920TraceManager& getTraceManager()
921{
922 CV_SINGLETON_LAZY_INIT_REF(TraceManager, getTraceManagerCallOnce())
923}
924
925void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx)
926{
927 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
928
929 if (ctx.dummy_stack_top.region == &rootRegion) // already attached
930 return;
931
932 CV_Assert(ctx.dummy_stack_top.region == NULL);
933 ctx.dummy_stack_top = TraceManagerThreadLocal::StackEntry(const_cast<Region*>(&rootRegion), NULL, -1);
934
935 if (&ctx == &root_ctx)
936 {
937 ctx.stat.grab(result&: ctx.parallel_for_stat);
938 ctx.parallel_for_stat_status = ctx.stat_status;
939 ctx.parallel_for_stack_size = ctx.stack.size();
940 return;
941 }
942
943 CV_Assert(ctx.stack.empty());
944
945 ctx.currentActiveRegion = const_cast<Region*>(&rootRegion);
946
947 ctx.regionDepth = root_ctx.regionDepth;
948 ctx.regionDepthOpenCV = root_ctx.regionDepthOpenCV;
949
950 ctx.parallel_for_stack_size = 0;
951
952 ctx.stat_status.propagateFrom(src: root_ctx.stat_status);
953}
954
955void parallelForAttachNestedRegion(const Region& rootRegion)
956{
957 CV_UNUSED(rootRegion);
958 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
959
960 CV_DbgAssert(ctx.dummy_stack_top.region == &rootRegion);
961
962 Region* region = ctx.getCurrentActiveRegion();
963 CV_LOG_PARALLEL(NULL, " PARALLEL_FOR: " << (void*)region << " ==> " << &rootRegion);
964 if (!region)
965 return;
966
967#ifdef OPENCV_WITH_ITT
968 if (!rootRegion.pImpl || !rootRegion.pImpl->itt_id_registered)
969 return;
970
971 if (!region->pImpl)
972 return;
973
974 CV_LOG_PARALLEL(NULL, " PARALLEL_FOR ITT: " << (void*)rootRegion.pImpl->itt_id.d1 << ":" << rootRegion.pImpl->itt_id.d2 << ":" << (void*)rootRegion.pImpl->itt_id.d3 << " => "
975 << (void*)region->pImpl->itt_id.d1 << ":" << region->pImpl->itt_id.d2 << ":" << (void*)region->pImpl->itt_id.d3);
976 __itt_relation_add(domain, region->pImpl->itt_id, __itt_relation_is_child_of, rootRegion.pImpl->itt_id);
977#endif
978}
979
980void parallelForFinalize(const Region& rootRegion)
981{
982 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
983
984 int64 endTimestamp = getTimestampNS();
985 int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
986 CV_LOG_PARALLEL(NULL, "parallel_for duration: " << duration << " " << &rootRegion);
987
988 std::vector<TraceManagerThreadLocal*> threads_ctx;
989 getTraceManager().tls.gather(data&: threads_ctx);
990 RegionStatistics parallel_for_stat;
991 for (size_t i = 0; i < threads_ctx.size(); i++)
992 {
993 TraceManagerThreadLocal* child_ctx = threads_ctx[i];
994
995 if (child_ctx && child_ctx->stackTopRegion() == &rootRegion)
996 {
997 CV_LOG_PARALLEL(NULL, "Thread=" << child_ctx->threadID << " " << child_ctx->stat);
998 RegionStatistics child_stat;
999 child_ctx->stat.grab(result&: child_stat);
1000 parallel_for_stat.append(stat&: child_stat);
1001 if (child_ctx != &ctx)
1002 {
1003 child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
1004 }
1005 else
1006 {
1007 ctx.parallel_for_stat.grab(result&: ctx.stat);
1008 ctx.stat_status = ctx.parallel_for_stat_status;
1009 child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
1010 }
1011 }
1012 }
1013
1014 float parallel_coeff = std::min(a: 1.0f, b: duration / (float)(parallel_for_stat.duration));
1015 CV_LOG_PARALLEL(NULL, "parallel_coeff=" << 1.0f / parallel_coeff);
1016 CV_LOG_PARALLEL(NULL, parallel_for_stat);
1017 if (parallel_coeff != 1.0f)
1018 {
1019 parallel_for_stat.multiply(c: parallel_coeff);
1020 CV_LOG_PARALLEL(NULL, parallel_for_stat);
1021 }
1022 parallel_for_stat.duration = 0;
1023 ctx.stat.append(stat&: parallel_for_stat);
1024 CV_LOG_PARALLEL(NULL, ctx.stat);
1025}
1026
1027struct TraceArg::ExtraData
1028{
1029#ifdef OPENCV_WITH_ITT
1030 // Special fields for ITT
1031 __itt_string_handle* volatile ittHandle_name;
1032#endif
1033 ExtraData(TraceManagerThreadLocal& ctx, const TraceArg& arg)
1034 {
1035 CV_UNUSED(ctx); CV_UNUSED(arg);
1036#ifdef OPENCV_WITH_ITT
1037 if (isITTEnabled())
1038 {
1039 // Caching is not required here, because there is builtin cache.
1040 // https://software.intel.com/en-us/node/544203:
1041 // Consecutive calls to __itt_string_handle_create with the same name return the same value.
1042 ittHandle_name = __itt_string_handle_create(arg.name);
1043 }
1044 else
1045 {
1046 ittHandle_name = 0;
1047 }
1048#endif
1049 }
1050};
1051
1052static void initTraceArg(TraceManagerThreadLocal& ctx, const TraceArg& arg)
1053{
1054 TraceArg::ExtraData** pExtra = arg.ppExtra;
1055 if (*pExtra == NULL)
1056 {
1057 cv::AutoLock lock(cv::getInitializationMutex());
1058 if (*pExtra == NULL)
1059 {
1060 *pExtra = new TraceArg::ExtraData(ctx, arg);
1061 }
1062 }
1063}
1064void traceArg(const TraceArg& arg, const char* value)
1065{
1066 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1067 Region* region = ctx.getCurrentActiveRegion();
1068 if (!region)
1069 return;
1070 CV_Assert(region->pImpl);
1071 initTraceArg(ctx, arg);
1072 if (!value)
1073 value = "<null>";
1074#ifdef OPENCV_WITH_ITT
1075 if (isITTEnabled())
1076 {
1077 __itt_metadata_str_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, value, strlen(value));
1078 }
1079#endif
1080}
1081void traceArg(const TraceArg& arg, int value)
1082{
1083 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1084 Region* region = ctx.getCurrentActiveRegion();
1085 if (!region)
1086 return;
1087 CV_Assert(region->pImpl);
1088 initTraceArg(ctx, arg);
1089#ifdef OPENCV_WITH_ITT
1090 if (isITTEnabled())
1091 {
1092 __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, sizeof(int) == 4 ? __itt_metadata_s32 : __itt_metadata_s64, 1, &value);
1093 }
1094#else
1095 CV_UNUSED(value);
1096#endif
1097}
1098void traceArg(const TraceArg& arg, int64 value)
1099{
1100 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1101 Region* region = ctx.getCurrentActiveRegion();
1102 if (!region)
1103 return;
1104 CV_Assert(region->pImpl);
1105 initTraceArg(ctx, arg);
1106#ifdef OPENCV_WITH_ITT
1107 if (isITTEnabled())
1108 {
1109 __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_s64, 1, &value);
1110 }
1111#else
1112 CV_UNUSED(value);
1113#endif
1114}
1115void traceArg(const TraceArg& arg, double value)
1116{
1117 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1118 Region* region = ctx.getCurrentActiveRegion();
1119 if (!region)
1120 return;
1121 CV_Assert(region->pImpl);
1122 initTraceArg(ctx, arg);
1123#ifdef OPENCV_WITH_ITT
1124 if (isITTEnabled())
1125 {
1126 __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_double, 1, &value);
1127 }
1128#else
1129 CV_UNUSED(value);
1130#endif
1131}
1132
1133#else
1134
1135Region::Region(const LocationStaticStorage&) : pImpl(NULL), implFlags(0) {}
1136void Region::destroy() {}
1137
1138void traceArg(const TraceArg&, const char*) {}
1139void traceArg(const TraceArg&, int) {};
1140void traceArg(const TraceArg&, int64) {};
1141void traceArg(const TraceArg&, double) {};
1142
1143#endif
1144
1145}}}} // namespace
1146

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of opencv/modules/core/src/trace.cpp