| 1 | // This file is part of OpenCV project. |
| 2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
| 3 | // of this distribution and at http://opencv.org/license.html. |
| 4 | |
| 5 | #ifndef OPENCV_TRACE_PRIVATE_HPP |
| 6 | #define OPENCV_TRACE_PRIVATE_HPP |
| 7 | |
| 8 | #ifdef OPENCV_TRACE |
| 9 | |
| 10 | #include <opencv2/core/utils/logger.hpp> |
| 11 | |
| 12 | #include <opencv2/core/utils/tls.hpp> |
| 13 | |
| 14 | #include "trace.hpp" |
| 15 | |
| 16 | //! @cond IGNORED |
| 17 | |
| 18 | #include <deque> |
| 19 | #include <ostream> |
| 20 | |
| 21 | #define INTEL_ITTNOTIFY_API_PRIVATE 1 |
| 22 | #ifdef OPENCV_WITH_ITT |
| 23 | #include "ittnotify.h" |
| 24 | #endif |
| 25 | |
| 26 | #ifndef DEBUG_ONLY |
| 27 | #ifdef _DEBUG |
| 28 | #define DEBUG_ONLY(...) __VA_ARGS__ |
| 29 | #else |
| 30 | #define DEBUG_ONLY(...) (void)0 |
| 31 | #endif |
| 32 | #endif |
| 33 | |
| 34 | #ifndef DEBUG_ONLY_ |
| 35 | #ifdef _DEBUG |
| 36 | #define DEBUG_ONLY_(...) __VA_ARGS__ |
| 37 | #else |
| 38 | #define DEBUG_ONLY_(...) |
| 39 | #endif |
| 40 | #endif |
| 41 | |
| 42 | |
| 43 | namespace cv { |
| 44 | namespace utils { |
| 45 | namespace trace { |
| 46 | namespace details { |
| 47 | |
| 48 | #define CV__TRACE_OPENCV_FUNCTION_NAME_(name, flags) \ |
| 49 | CV__TRACE_DEFINE_LOCATION_FN(name, flags); \ |
| 50 | const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); |
| 51 | |
| 52 | |
| 53 | enum RegionFlag { |
| 54 | REGION_FLAG__NEED_STACK_POP = (1 << 0), |
| 55 | REGION_FLAG__ACTIVE = (1 << 1), |
| 56 | |
| 57 | ENUM_REGION_FLAG_IMPL_FORCE_INT = INT_MAX |
| 58 | }; |
| 59 | |
| 60 | |
| 61 | class TraceMessage; |
| 62 | |
| 63 | class TraceStorage { |
| 64 | public: |
| 65 | TraceStorage() {} |
| 66 | virtual ~TraceStorage() {} |
| 67 | |
| 68 | virtual bool put(const TraceMessage& msg) const = 0; |
| 69 | }; |
| 70 | |
| 71 | struct RegionStatistics |
| 72 | { |
| 73 | int currentSkippedRegions; |
| 74 | |
| 75 | int64 duration; |
| 76 | #ifdef HAVE_IPP |
| 77 | int64 durationImplIPP; |
| 78 | #endif |
| 79 | #ifdef HAVE_OPENCL |
| 80 | int64 durationImplOpenCL; |
| 81 | #endif |
| 82 | #ifdef HAVE_OPENVX |
| 83 | int64 durationImplOpenVX; |
| 84 | #endif |
| 85 | |
| 86 | RegionStatistics() : |
| 87 | currentSkippedRegions(0), |
| 88 | duration(0) |
| 89 | #ifdef HAVE_IPP |
| 90 | ,durationImplIPP(0) |
| 91 | #endif |
| 92 | #ifdef HAVE_OPENCL |
| 93 | ,durationImplOpenCL(0) |
| 94 | #endif |
| 95 | #ifdef HAVE_OPENVX |
| 96 | ,durationImplOpenVX(0) |
| 97 | #endif |
| 98 | {} |
| 99 | |
| 100 | void grab(RegionStatistics& result) |
| 101 | { |
| 102 | result.currentSkippedRegions = currentSkippedRegions; currentSkippedRegions = 0; |
| 103 | result.duration = duration; duration = 0; |
| 104 | #ifdef HAVE_IPP |
| 105 | result.durationImplIPP = durationImplIPP; durationImplIPP = 0; |
| 106 | #endif |
| 107 | #ifdef HAVE_OPENCL |
| 108 | result.durationImplOpenCL = durationImplOpenCL; durationImplOpenCL = 0; |
| 109 | #endif |
| 110 | #ifdef HAVE_OPENVX |
| 111 | result.durationImplOpenVX = durationImplOpenVX; durationImplOpenVX = 0; |
| 112 | #endif |
| 113 | } |
| 114 | |
| 115 | void append(RegionStatistics& stat) |
| 116 | { |
| 117 | currentSkippedRegions += stat.currentSkippedRegions; |
| 118 | duration += stat.duration; |
| 119 | #ifdef HAVE_IPP |
| 120 | durationImplIPP += stat.durationImplIPP; |
| 121 | #endif |
| 122 | #ifdef HAVE_OPENCL |
| 123 | durationImplOpenCL += stat.durationImplOpenCL; |
| 124 | #endif |
| 125 | #ifdef HAVE_OPENVX |
| 126 | durationImplOpenVX += stat.durationImplOpenVX; |
| 127 | #endif |
| 128 | } |
| 129 | |
| 130 | void multiply(const float c) |
| 131 | { |
| 132 | duration = (int64)(duration * c); |
| 133 | #ifdef HAVE_IPP |
| 134 | durationImplIPP = (int64)(durationImplIPP * c); |
| 135 | #endif |
| 136 | #ifdef HAVE_OPENCL |
| 137 | durationImplOpenCL = (int64)(durationImplOpenCL * c); |
| 138 | #endif |
| 139 | #ifdef HAVE_OPENVX |
| 140 | durationImplOpenVX = (int64)(durationImplOpenVX * c); |
| 141 | #endif |
| 142 | } |
| 143 | }; |
| 144 | |
| 145 | static inline |
| 146 | std::ostream& operator<<(std::ostream& out, const RegionStatistics& stat) |
| 147 | { |
| 148 | out << "skip=" << stat.currentSkippedRegions |
| 149 | << " duration=" << stat.duration |
| 150 | #ifdef HAVE_IPP |
| 151 | << " durationImplIPP=" << stat.durationImplIPP |
| 152 | #endif |
| 153 | #ifdef HAVE_OPENCL |
| 154 | << " durationImplOpenCL=" << stat.durationImplOpenCL |
| 155 | #endif |
| 156 | #ifdef HAVE_OPENVX |
| 157 | << " durationImplOpenVX=" << stat.durationImplOpenVX |
| 158 | #endif |
| 159 | ; |
| 160 | return out; |
| 161 | } |
| 162 | |
| 163 | struct RegionStatisticsStatus |
| 164 | { |
| 165 | int _skipDepth; |
| 166 | #ifdef HAVE_IPP |
| 167 | int ignoreDepthImplIPP; |
| 168 | #endif |
| 169 | #ifdef HAVE_OPENCL |
| 170 | int ignoreDepthImplOpenCL; |
| 171 | #endif |
| 172 | #ifdef HAVE_OPENVX |
| 173 | int ignoreDepthImplOpenVX; |
| 174 | #endif |
| 175 | |
| 176 | RegionStatisticsStatus() { reset(); } |
| 177 | |
| 178 | void reset() |
| 179 | { |
| 180 | _skipDepth = -1; |
| 181 | #ifdef HAVE_IPP |
| 182 | ignoreDepthImplIPP = 0; |
| 183 | #endif |
| 184 | #ifdef HAVE_OPENCL |
| 185 | ignoreDepthImplOpenCL = 0; |
| 186 | #endif |
| 187 | #ifdef HAVE_OPENVX |
| 188 | ignoreDepthImplOpenVX = 0; |
| 189 | #endif |
| 190 | } |
| 191 | |
| 192 | void propagateFrom(const RegionStatisticsStatus& src) |
| 193 | { |
| 194 | _skipDepth = -1; |
| 195 | if (src._skipDepth >= 0) |
| 196 | enableSkipMode(depth: 0); |
| 197 | #ifdef HAVE_IPP |
| 198 | ignoreDepthImplIPP = src.ignoreDepthImplIPP ? 1 : 0; |
| 199 | #endif |
| 200 | #ifdef HAVE_OPENCL |
| 201 | ignoreDepthImplOpenCL = src.ignoreDepthImplOpenCL ? 1 : 0; |
| 202 | #endif |
| 203 | #ifdef HAVE_OPENVX |
| 204 | ignoreDepthImplOpenVX = src.ignoreDepthImplOpenVX ? 1 : 0; |
| 205 | #endif |
| 206 | } |
| 207 | |
| 208 | void enableSkipMode(int depth); |
| 209 | void checkResetSkipMode(int leaveDepth); |
| 210 | }; |
| 211 | |
| 212 | static inline |
| 213 | std::ostream& operator<<(std::ostream& out, const RegionStatisticsStatus& s) |
| 214 | { |
| 215 | out << "ignore={" ; |
| 216 | if (s._skipDepth >= 0) |
| 217 | out << " SKIP=" << s._skipDepth; |
| 218 | #ifdef HAVE_IPP |
| 219 | if (s.ignoreDepthImplIPP) |
| 220 | out << " IPP=" << s.ignoreDepthImplIPP; |
| 221 | #endif |
| 222 | #ifdef HAVE_OPENCL |
| 223 | if (s.ignoreDepthImplOpenCL) |
| 224 | out << " OpenCL=" << s.ignoreDepthImplOpenCL; |
| 225 | #endif |
| 226 | #ifdef HAVE_OPENVX |
| 227 | if (s.ignoreDepthImplOpenVX) |
| 228 | out << " OpenVX=" << s.ignoreDepthImplOpenVX; |
| 229 | #endif |
| 230 | out << "}" ; |
| 231 | return out; |
| 232 | } |
| 233 | |
| 234 | //! TraceManager for local thread |
| 235 | struct TraceManagerThreadLocal |
| 236 | { |
| 237 | const int threadID; |
| 238 | int region_counter; |
| 239 | |
| 240 | size_t totalSkippedEvents; |
| 241 | |
| 242 | Region* currentActiveRegion; |
| 243 | |
| 244 | struct StackEntry |
| 245 | { |
| 246 | Region* region; |
| 247 | const Region::LocationStaticStorage* location; |
| 248 | int64 beginTimestamp; |
| 249 | StackEntry(Region* region_, const Region::LocationStaticStorage* location_, int64 beginTimestamp_) : |
| 250 | region(region_), location(location_), beginTimestamp(beginTimestamp_) |
| 251 | {} |
| 252 | StackEntry() : region(NULL), location(NULL), beginTimestamp(-1) {} |
| 253 | }; |
| 254 | std::deque<StackEntry> stack; |
| 255 | |
| 256 | int regionDepth; // functions only (no named regions) |
| 257 | int regionDepthOpenCV; // functions from OpenCV library |
| 258 | |
| 259 | RegionStatistics stat; |
| 260 | RegionStatisticsStatus stat_status; |
| 261 | |
| 262 | StackEntry dummy_stack_top; // parallel_for root region |
| 263 | RegionStatistics parallel_for_stat; |
| 264 | RegionStatisticsStatus parallel_for_stat_status; |
| 265 | size_t parallel_for_stack_size; |
| 266 | |
| 267 | |
| 268 | mutable cv::Ptr<TraceStorage> storage; |
| 269 | |
| 270 | TraceManagerThreadLocal() : |
| 271 | threadID(cv::utils::getThreadID()), |
| 272 | region_counter(0), totalSkippedEvents(0), |
| 273 | currentActiveRegion(NULL), |
| 274 | regionDepth(0), |
| 275 | regionDepthOpenCV(0), |
| 276 | parallel_for_stack_size(0) |
| 277 | { |
| 278 | } |
| 279 | |
| 280 | ~TraceManagerThreadLocal(); |
| 281 | |
| 282 | TraceStorage* getStorage() const; |
| 283 | |
| 284 | void recordLocation(const Region::LocationStaticStorage& location); |
| 285 | void recordRegionEnter(const Region& region); |
| 286 | void recordRegionLeave(const Region& region, const RegionStatistics& result); |
| 287 | void recordRegionArg(const Region& region, const TraceArg& arg, const char& value); |
| 288 | |
| 289 | inline void stackPush(Region* region, const Region::LocationStaticStorage* location, int64 beginTimestamp) |
| 290 | { |
| 291 | stack.push_back(x: StackEntry(region, location, beginTimestamp)); |
| 292 | } |
| 293 | inline Region* stackTopRegion() const |
| 294 | { |
| 295 | if (stack.empty()) |
| 296 | return dummy_stack_top.region; |
| 297 | return stack.back().region; |
| 298 | } |
| 299 | inline const Region::LocationStaticStorage* stackTopLocation() const |
| 300 | { |
| 301 | if (stack.empty()) |
| 302 | return dummy_stack_top.location; |
| 303 | return stack.back().location; |
| 304 | } |
| 305 | inline int64 stackTopBeginTimestamp() const |
| 306 | { |
| 307 | if (stack.empty()) |
| 308 | return dummy_stack_top.beginTimestamp; |
| 309 | return stack.back().beginTimestamp; |
| 310 | } |
| 311 | inline void stackPop() |
| 312 | { |
| 313 | CV_DbgAssert(!stack.empty()); |
| 314 | stack.pop_back(); |
| 315 | } |
| 316 | void dumpStack(std::ostream& out, bool onlyFunctions) const; |
| 317 | |
| 318 | inline Region* getCurrentActiveRegion() |
| 319 | { |
| 320 | return currentActiveRegion; |
| 321 | } |
| 322 | |
| 323 | inline int getCurrentDepth() const { return (int)stack.size(); } |
| 324 | }; |
| 325 | |
| 326 | class CV_EXPORTS TraceManager |
| 327 | { |
| 328 | public: |
| 329 | TraceManager(); |
| 330 | ~TraceManager(); |
| 331 | |
| 332 | static bool isActivated(); |
| 333 | |
| 334 | Mutex mutexCreate; |
| 335 | Mutex mutexCount; |
| 336 | |
| 337 | TLSDataAccumulator<TraceManagerThreadLocal> tls; |
| 338 | |
| 339 | cv::Ptr<TraceStorage> trace_storage; |
| 340 | private: |
| 341 | // disable copying |
| 342 | TraceManager(const TraceManager&); |
| 343 | TraceManager& operator=(const TraceManager&); |
| 344 | }; |
| 345 | |
| 346 | CV_EXPORTS TraceManager& getTraceManager(); |
| 347 | inline Region* getCurrentActiveRegion() { return getTraceManager().tls.get()->getCurrentActiveRegion(); } |
| 348 | inline Region* getCurrentRegion() { return getTraceManager().tls.get()->stackTopRegion(); } |
| 349 | |
| 350 | void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx); |
| 351 | void parallelForAttachNestedRegion(const Region& rootRegion); |
| 352 | void parallelForFinalize(const Region& rootRegion); |
| 353 | |
| 354 | |
| 355 | |
| 356 | |
| 357 | |
| 358 | |
| 359 | |
| 360 | struct Region:: |
| 361 | { |
| 362 | int ; // 0 - region is disabled |
| 363 | #ifdef OPENCV_WITH_ITT |
| 364 | // Special fields for ITT |
| 365 | __itt_string_handle* volatile ittHandle_name; |
| 366 | __itt_string_handle* volatile ittHandle_filename; |
| 367 | #endif |
| 368 | (const LocationStaticStorage& location); |
| 369 | |
| 370 | static Region::LocationExtraData* (const Region::LocationStaticStorage& location); |
| 371 | }; |
| 372 | |
| 373 | class Region::Impl |
| 374 | { |
| 375 | public: |
| 376 | const LocationStaticStorage& location; |
| 377 | |
| 378 | Region& region; |
| 379 | Region* const parentRegion; |
| 380 | |
| 381 | const int threadID; |
| 382 | const int global_region_id; |
| 383 | |
| 384 | const int64 beginTimestamp; |
| 385 | int64 endTimestamp; |
| 386 | |
| 387 | int directChildrenCount; |
| 388 | |
| 389 | enum OptimizationPath { |
| 390 | CODE_PATH_PLAIN = 0, |
| 391 | CODE_PATH_IPP, |
| 392 | CODE_PATH_OPENCL, |
| 393 | CODE_PATH_OPENVX |
| 394 | }; |
| 395 | |
| 396 | #ifdef OPENCV_WITH_ITT |
| 397 | bool itt_id_registered; |
| 398 | __itt_id itt_id; |
| 399 | #endif |
| 400 | |
| 401 | Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_); |
| 402 | |
| 403 | void enterRegion(TraceManagerThreadLocal& ctx); |
| 404 | void leaveRegion(TraceManagerThreadLocal& ctx); |
| 405 | |
| 406 | void registerRegion(TraceManagerThreadLocal& ctx); |
| 407 | |
| 408 | void release(); |
| 409 | protected: |
| 410 | ~Impl(); |
| 411 | }; |
| 412 | |
| 413 | |
| 414 | |
| 415 | }}}} // namespace |
| 416 | |
| 417 | //! @endcond |
| 418 | |
| 419 | #endif |
| 420 | |
| 421 | #endif // OPENCV_TRACE_PRIVATE_HPP |
| 422 | |