1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "default.h" |
7 | |
8 | namespace embree |
9 | { |
10 | /*! helper structure for the implementation of the profile functions below */ |
11 | struct ProfileTimer |
12 | { |
13 | static const size_t N = 20; |
14 | |
15 | ProfileTimer () {} |
16 | |
17 | ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0) |
18 | { |
19 | for (size_t i=0; i<N; i++) names[i] = nullptr; |
20 | for (size_t i=0; i<N; i++) dt_fst[i] = 0.0; |
21 | for (size_t i=0; i<N; i++) dt_min[i] = pos_inf; |
22 | for (size_t i=0; i<N; i++) dt_avg[i] = 0.0; |
23 | for (size_t i=0; i<N; i++) dt_max[i] = neg_inf; |
24 | } |
25 | |
26 | __forceinline void begin() |
27 | { |
28 | j=0; |
29 | t0 = tj = getSeconds(); |
30 | } |
31 | |
32 | __forceinline void end() { |
33 | absolute(name: "total" ); |
34 | i++; |
35 | } |
36 | |
37 | __forceinline void operator() (const char* name) { |
38 | relative(name); |
39 | } |
40 | |
41 | __forceinline void absolute (const char* name) |
42 | { |
43 | const double t1 = getSeconds(); |
44 | const double dt = t1-t0; |
45 | assert(names[j] == nullptr || names[j] == name); |
46 | names[j] = name; |
47 | if (i == 0) dt_fst[j] = dt; |
48 | if (i>=numSkip) { |
49 | dt_min[j] = min(a: dt_min[j],b: dt); |
50 | dt_avg[j] = dt_avg[j] + dt; |
51 | dt_max[j] = max(a: dt_max[j],b: dt); |
52 | } |
53 | j++; |
54 | maxJ = max(a: maxJ,b: j); |
55 | } |
56 | |
57 | __forceinline void relative (const char* name) |
58 | { |
59 | const double t1 = getSeconds(); |
60 | const double dt = t1-tj; |
61 | tj = t1; |
62 | assert(names[j] == nullptr || names[j] == name); |
63 | names[j] = name; |
64 | if (i == 0) dt_fst[j] = dt; |
65 | if (i>=numSkip) { |
66 | dt_min[j] = min(a: dt_min[j],b: dt); |
67 | dt_avg[j] = dt_avg[j] + dt; |
68 | dt_max[j] = max(a: dt_max[j],b: dt); |
69 | } |
70 | j++; |
71 | maxJ = max(a: maxJ,b: j); |
72 | } |
73 | |
74 | void print(size_t numElements) |
75 | { |
76 | for (size_t k=0; k<N; k++) |
77 | dt_avg[k] /= double(i-numSkip); |
78 | |
79 | printf(format: " profile [M/s]:\n" ); |
80 | for (size_t j=0; j<maxJ; j++) |
81 | printf(format: "%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n" , |
82 | names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6); |
83 | |
84 | printf(format: " profile [ms]:\n" ); |
85 | for (size_t j=0; j<maxJ; j++) |
86 | printf(format: "%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n" , |
87 | names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]); |
88 | } |
89 | |
90 | void print() |
91 | { |
92 | printf(format: " profile:\n" ); |
93 | |
94 | for (size_t k=0; k<N; k++) |
95 | dt_avg[k] /= double(i-numSkip); |
96 | |
97 | for (size_t j=0; j<maxJ; j++) { |
98 | printf(format: "%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n" , |
99 | names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]); |
100 | } |
101 | } |
102 | |
103 | double avg() { |
104 | return dt_avg[maxJ-1]/double(i-numSkip); |
105 | } |
106 | |
107 | private: |
108 | size_t i; |
109 | size_t j; |
110 | size_t maxJ; |
111 | size_t numSkip; |
112 | double t0; |
113 | double tj; |
114 | const char* names[N]; |
115 | double dt_fst[N]; |
116 | double dt_min[N]; |
117 | double dt_avg[N]; |
118 | double dt_max[N]; |
119 | }; |
120 | |
121 | /*! This function executes some code block multiple times and measured sections of it. |
122 | Use the following way: |
123 | |
124 | profile(1,10,1000,[&](ProfileTimer& timer) { |
125 | // code |
126 | timer("A"); |
127 | // code |
128 | timer("B"); |
129 | }); |
130 | */ |
131 | template<typename Closure> |
132 | void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure) |
133 | { |
134 | ProfileTimer timer(numSkip); |
135 | |
136 | for (size_t i=0; i<numSkip+numIter; i++) |
137 | { |
138 | timer.begin(); |
139 | closure(timer); |
140 | timer.end(); |
141 | } |
142 | timer.print(numElements); |
143 | } |
144 | |
145 | /*! similar as the function above, but the timer object comes externally */ |
146 | template<typename Closure> |
147 | void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure) |
148 | { |
149 | timer = ProfileTimer(numSkip); |
150 | |
151 | for (size_t i=0; i<numSkip+numIter; i++) |
152 | { |
153 | timer.begin(); |
154 | closure(timer); |
155 | timer.end(); |
156 | } |
157 | timer.print(numElements); |
158 | } |
159 | } |
160 | |