1 | // This file is part of OpenCV project. |
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html. |
4 | |
5 | #ifndef OPENCV_TRACE_PRIVATE_HPP |
6 | #define OPENCV_TRACE_PRIVATE_HPP |
7 | |
8 | #ifdef OPENCV_TRACE |
9 | |
10 | #include <opencv2/core/utils/logger.hpp> |
11 | |
12 | #include <opencv2/core/utils/tls.hpp> |
13 | |
14 | #include "trace.hpp" |
15 | |
16 | //! @cond IGNORED |
17 | |
18 | #include <deque> |
19 | #include <ostream> |
20 | |
21 | #define INTEL_ITTNOTIFY_API_PRIVATE 1 |
22 | #ifdef OPENCV_WITH_ITT |
23 | #include "ittnotify.h" |
24 | #endif |
25 | |
26 | #ifndef DEBUG_ONLY |
27 | #ifdef _DEBUG |
28 | #define DEBUG_ONLY(...) __VA_ARGS__ |
29 | #else |
30 | #define DEBUG_ONLY(...) (void)0 |
31 | #endif |
32 | #endif |
33 | |
34 | #ifndef DEBUG_ONLY_ |
35 | #ifdef _DEBUG |
36 | #define DEBUG_ONLY_(...) __VA_ARGS__ |
37 | #else |
38 | #define DEBUG_ONLY_(...) |
39 | #endif |
40 | #endif |
41 | |
42 | |
43 | namespace cv { |
44 | namespace utils { |
45 | namespace trace { |
46 | namespace details { |
47 | |
48 | #define CV__TRACE_OPENCV_FUNCTION_NAME_(name, flags) \ |
49 | CV__TRACE_DEFINE_LOCATION_FN(name, flags); \ |
50 | const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); |
51 | |
52 | |
53 | enum RegionFlag { |
54 | REGION_FLAG__NEED_STACK_POP = (1 << 0), |
55 | REGION_FLAG__ACTIVE = (1 << 1), |
56 | |
57 | ENUM_REGION_FLAG_IMPL_FORCE_INT = INT_MAX |
58 | }; |
59 | |
60 | |
61 | class TraceMessage; |
62 | |
63 | class TraceStorage { |
64 | public: |
65 | TraceStorage() {} |
66 | virtual ~TraceStorage() {} |
67 | |
68 | virtual bool put(const TraceMessage& msg) const = 0; |
69 | }; |
70 | |
71 | struct RegionStatistics |
72 | { |
73 | int currentSkippedRegions; |
74 | |
75 | int64 duration; |
76 | #ifdef HAVE_IPP |
77 | int64 durationImplIPP; |
78 | #endif |
79 | #ifdef HAVE_OPENCL |
80 | int64 durationImplOpenCL; |
81 | #endif |
82 | #ifdef HAVE_OPENVX |
83 | int64 durationImplOpenVX; |
84 | #endif |
85 | |
86 | RegionStatistics() : |
87 | currentSkippedRegions(0), |
88 | duration(0) |
89 | #ifdef HAVE_IPP |
90 | ,durationImplIPP(0) |
91 | #endif |
92 | #ifdef HAVE_OPENCL |
93 | ,durationImplOpenCL(0) |
94 | #endif |
95 | #ifdef HAVE_OPENVX |
96 | ,durationImplOpenVX(0) |
97 | #endif |
98 | {} |
99 | |
100 | void grab(RegionStatistics& result) |
101 | { |
102 | result.currentSkippedRegions = currentSkippedRegions; currentSkippedRegions = 0; |
103 | result.duration = duration; duration = 0; |
104 | #ifdef HAVE_IPP |
105 | result.durationImplIPP = durationImplIPP; durationImplIPP = 0; |
106 | #endif |
107 | #ifdef HAVE_OPENCL |
108 | result.durationImplOpenCL = durationImplOpenCL; durationImplOpenCL = 0; |
109 | #endif |
110 | #ifdef HAVE_OPENVX |
111 | result.durationImplOpenVX = durationImplOpenVX; durationImplOpenVX = 0; |
112 | #endif |
113 | } |
114 | |
115 | void append(RegionStatistics& stat) |
116 | { |
117 | currentSkippedRegions += stat.currentSkippedRegions; |
118 | duration += stat.duration; |
119 | #ifdef HAVE_IPP |
120 | durationImplIPP += stat.durationImplIPP; |
121 | #endif |
122 | #ifdef HAVE_OPENCL |
123 | durationImplOpenCL += stat.durationImplOpenCL; |
124 | #endif |
125 | #ifdef HAVE_OPENVX |
126 | durationImplOpenVX += stat.durationImplOpenVX; |
127 | #endif |
128 | } |
129 | |
130 | void multiply(const float c) |
131 | { |
132 | duration = (int64)(duration * c); |
133 | #ifdef HAVE_IPP |
134 | durationImplIPP = (int64)(durationImplIPP * c); |
135 | #endif |
136 | #ifdef HAVE_OPENCL |
137 | durationImplOpenCL = (int64)(durationImplOpenCL * c); |
138 | #endif |
139 | #ifdef HAVE_OPENVX |
140 | durationImplOpenVX = (int64)(durationImplOpenVX * c); |
141 | #endif |
142 | } |
143 | }; |
144 | |
145 | static inline |
146 | std::ostream& operator<<(std::ostream& out, const RegionStatistics& stat) |
147 | { |
148 | out << "skip=" << stat.currentSkippedRegions |
149 | << " duration=" << stat.duration |
150 | #ifdef HAVE_IPP |
151 | << " durationImplIPP=" << stat.durationImplIPP |
152 | #endif |
153 | #ifdef HAVE_OPENCL |
154 | << " durationImplOpenCL=" << stat.durationImplOpenCL |
155 | #endif |
156 | #ifdef HAVE_OPENVX |
157 | << " durationImplOpenVX=" << stat.durationImplOpenVX |
158 | #endif |
159 | ; |
160 | return out; |
161 | } |
162 | |
163 | struct RegionStatisticsStatus |
164 | { |
165 | int _skipDepth; |
166 | #ifdef HAVE_IPP |
167 | int ignoreDepthImplIPP; |
168 | #endif |
169 | #ifdef HAVE_OPENCL |
170 | int ignoreDepthImplOpenCL; |
171 | #endif |
172 | #ifdef HAVE_OPENVX |
173 | int ignoreDepthImplOpenVX; |
174 | #endif |
175 | |
176 | RegionStatisticsStatus() { reset(); } |
177 | |
178 | void reset() |
179 | { |
180 | _skipDepth = -1; |
181 | #ifdef HAVE_IPP |
182 | ignoreDepthImplIPP = 0; |
183 | #endif |
184 | #ifdef HAVE_OPENCL |
185 | ignoreDepthImplOpenCL = 0; |
186 | #endif |
187 | #ifdef HAVE_OPENVX |
188 | ignoreDepthImplOpenVX = 0; |
189 | #endif |
190 | } |
191 | |
192 | void propagateFrom(const RegionStatisticsStatus& src) |
193 | { |
194 | _skipDepth = -1; |
195 | if (src._skipDepth >= 0) |
196 | enableSkipMode(depth: 0); |
197 | #ifdef HAVE_IPP |
198 | ignoreDepthImplIPP = src.ignoreDepthImplIPP ? 1 : 0; |
199 | #endif |
200 | #ifdef HAVE_OPENCL |
201 | ignoreDepthImplOpenCL = src.ignoreDepthImplOpenCL ? 1 : 0; |
202 | #endif |
203 | #ifdef HAVE_OPENVX |
204 | ignoreDepthImplOpenVX = src.ignoreDepthImplOpenVX ? 1 : 0; |
205 | #endif |
206 | } |
207 | |
208 | void enableSkipMode(int depth); |
209 | void checkResetSkipMode(int leaveDepth); |
210 | }; |
211 | |
212 | static inline |
213 | std::ostream& operator<<(std::ostream& out, const RegionStatisticsStatus& s) |
214 | { |
215 | out << "ignore={" ; |
216 | if (s._skipDepth >= 0) |
217 | out << " SKIP=" << s._skipDepth; |
218 | #ifdef HAVE_IPP |
219 | if (s.ignoreDepthImplIPP) |
220 | out << " IPP=" << s.ignoreDepthImplIPP; |
221 | #endif |
222 | #ifdef HAVE_OPENCL |
223 | if (s.ignoreDepthImplOpenCL) |
224 | out << " OpenCL=" << s.ignoreDepthImplOpenCL; |
225 | #endif |
226 | #ifdef HAVE_OPENVX |
227 | if (s.ignoreDepthImplOpenVX) |
228 | out << " OpenVX=" << s.ignoreDepthImplOpenVX; |
229 | #endif |
230 | out << "}" ; |
231 | return out; |
232 | } |
233 | |
234 | //! TraceManager for local thread |
235 | struct TraceManagerThreadLocal |
236 | { |
237 | const int threadID; |
238 | int region_counter; |
239 | |
240 | size_t totalSkippedEvents; |
241 | |
242 | Region* currentActiveRegion; |
243 | |
244 | struct StackEntry |
245 | { |
246 | Region* region; |
247 | const Region::LocationStaticStorage* location; |
248 | int64 beginTimestamp; |
249 | StackEntry(Region* region_, const Region::LocationStaticStorage* location_, int64 beginTimestamp_) : |
250 | region(region_), location(location_), beginTimestamp(beginTimestamp_) |
251 | {} |
252 | StackEntry() : region(NULL), location(NULL), beginTimestamp(-1) {} |
253 | }; |
254 | std::deque<StackEntry> stack; |
255 | |
256 | int regionDepth; // functions only (no named regions) |
257 | int regionDepthOpenCV; // functions from OpenCV library |
258 | |
259 | RegionStatistics stat; |
260 | RegionStatisticsStatus stat_status; |
261 | |
262 | StackEntry dummy_stack_top; // parallel_for root region |
263 | RegionStatistics parallel_for_stat; |
264 | RegionStatisticsStatus parallel_for_stat_status; |
265 | size_t parallel_for_stack_size; |
266 | |
267 | |
268 | mutable cv::Ptr<TraceStorage> storage; |
269 | |
270 | TraceManagerThreadLocal() : |
271 | threadID(cv::utils::getThreadID()), |
272 | region_counter(0), totalSkippedEvents(0), |
273 | currentActiveRegion(NULL), |
274 | regionDepth(0), |
275 | regionDepthOpenCV(0), |
276 | parallel_for_stack_size(0) |
277 | { |
278 | } |
279 | |
280 | ~TraceManagerThreadLocal(); |
281 | |
282 | TraceStorage* getStorage() const; |
283 | |
284 | void recordLocation(const Region::LocationStaticStorage& location); |
285 | void recordRegionEnter(const Region& region); |
286 | void recordRegionLeave(const Region& region, const RegionStatistics& result); |
287 | void recordRegionArg(const Region& region, const TraceArg& arg, const char& value); |
288 | |
289 | inline void stackPush(Region* region, const Region::LocationStaticStorage* location, int64 beginTimestamp) |
290 | { |
291 | stack.push_back(x: StackEntry(region, location, beginTimestamp)); |
292 | } |
293 | inline Region* stackTopRegion() const |
294 | { |
295 | if (stack.empty()) |
296 | return dummy_stack_top.region; |
297 | return stack.back().region; |
298 | } |
299 | inline const Region::LocationStaticStorage* stackTopLocation() const |
300 | { |
301 | if (stack.empty()) |
302 | return dummy_stack_top.location; |
303 | return stack.back().location; |
304 | } |
305 | inline int64 stackTopBeginTimestamp() const |
306 | { |
307 | if (stack.empty()) |
308 | return dummy_stack_top.beginTimestamp; |
309 | return stack.back().beginTimestamp; |
310 | } |
311 | inline void stackPop() |
312 | { |
313 | CV_DbgAssert(!stack.empty()); |
314 | stack.pop_back(); |
315 | } |
316 | void dumpStack(std::ostream& out, bool onlyFunctions) const; |
317 | |
318 | inline Region* getCurrentActiveRegion() |
319 | { |
320 | return currentActiveRegion; |
321 | } |
322 | |
323 | inline int getCurrentDepth() const { return (int)stack.size(); } |
324 | }; |
325 | |
326 | class CV_EXPORTS TraceManager |
327 | { |
328 | public: |
329 | TraceManager(); |
330 | ~TraceManager(); |
331 | |
332 | static bool isActivated(); |
333 | |
334 | Mutex mutexCreate; |
335 | Mutex mutexCount; |
336 | |
337 | TLSDataAccumulator<TraceManagerThreadLocal> tls; |
338 | |
339 | cv::Ptr<TraceStorage> trace_storage; |
340 | private: |
341 | // disable copying |
342 | TraceManager(const TraceManager&); |
343 | TraceManager& operator=(const TraceManager&); |
344 | }; |
345 | |
346 | CV_EXPORTS TraceManager& getTraceManager(); |
347 | inline Region* getCurrentActiveRegion() { return getTraceManager().tls.get()->getCurrentActiveRegion(); } |
348 | inline Region* getCurrentRegion() { return getTraceManager().tls.get()->stackTopRegion(); } |
349 | |
350 | void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx); |
351 | void parallelForAttachNestedRegion(const Region& rootRegion); |
352 | void parallelForFinalize(const Region& rootRegion); |
353 | |
354 | |
355 | |
356 | |
357 | |
358 | |
359 | |
360 | struct Region:: |
361 | { |
362 | int ; // 0 - region is disabled |
363 | #ifdef OPENCV_WITH_ITT |
364 | // Special fields for ITT |
365 | __itt_string_handle* volatile ittHandle_name; |
366 | __itt_string_handle* volatile ittHandle_filename; |
367 | #endif |
368 | (const LocationStaticStorage& location); |
369 | |
370 | static Region::LocationExtraData* (const Region::LocationStaticStorage& location); |
371 | }; |
372 | |
373 | class Region::Impl |
374 | { |
375 | public: |
376 | const LocationStaticStorage& location; |
377 | |
378 | Region& region; |
379 | Region* const parentRegion; |
380 | |
381 | const int threadID; |
382 | const int global_region_id; |
383 | |
384 | const int64 beginTimestamp; |
385 | int64 endTimestamp; |
386 | |
387 | int directChildrenCount; |
388 | |
389 | enum OptimizationPath { |
390 | CODE_PATH_PLAIN = 0, |
391 | CODE_PATH_IPP, |
392 | CODE_PATH_OPENCL, |
393 | CODE_PATH_OPENVX |
394 | }; |
395 | |
396 | #ifdef OPENCV_WITH_ITT |
397 | bool itt_id_registered; |
398 | __itt_id itt_id; |
399 | #endif |
400 | |
401 | Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_); |
402 | |
403 | void enterRegion(TraceManagerThreadLocal& ctx); |
404 | void leaveRegion(TraceManagerThreadLocal& ctx); |
405 | |
406 | void registerRegion(TraceManagerThreadLocal& ctx); |
407 | |
408 | void release(); |
409 | protected: |
410 | ~Impl(); |
411 | }; |
412 | |
413 | |
414 | |
415 | }}}} // namespace |
416 | |
417 | //! @endcond |
418 | |
419 | #endif |
420 | |
421 | #endif // OPENCV_TRACE_PRIVATE_HPP |
422 | |