1 | // This file is part of OpenCV project. |
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html. |
4 | |
5 | #include "../precomp.hpp" |
6 | |
7 | #ifdef HAVE_OPENCV_DNN |
8 | #include "opencv2/dnn.hpp" |
9 | #endif |
10 | |
11 | namespace cv { |
12 | |
13 | TrackerDaSiamRPN::TrackerDaSiamRPN() |
14 | { |
15 | // nothing |
16 | } |
17 | |
18 | TrackerDaSiamRPN::~TrackerDaSiamRPN() |
19 | { |
20 | // nothing |
21 | } |
22 | |
23 | TrackerDaSiamRPN::Params::Params() |
24 | { |
25 | model = "dasiamrpn_model.onnx" ; |
26 | kernel_cls1 = "dasiamrpn_kernel_cls1.onnx" ; |
27 | kernel_r1 = "dasiamrpn_kernel_r1.onnx" ; |
28 | #ifdef HAVE_OPENCV_DNN |
29 | backend = dnn::DNN_BACKEND_DEFAULT; |
30 | target = dnn::DNN_TARGET_CPU; |
31 | #else |
32 | backend = -1; // invalid value |
33 | target = -1; // invalid value |
34 | #endif |
35 | } |
36 | |
37 | #ifdef HAVE_OPENCV_DNN |
38 | |
39 | template <typename T> static |
40 | T sizeCal(const T& w, const T& h) |
41 | { |
42 | T pad = (w + h) * T(0.5); |
43 | T sz2 = (w + pad) * (h + pad); |
44 | return sqrt(sz2); |
45 | } |
46 | |
47 | template <> |
48 | Mat sizeCal(const Mat& w, const Mat& h) |
49 | { |
50 | Mat pad = (w + h) * 0.5; |
51 | Mat sz2 = (w + pad).mul(e: (h + pad)); |
52 | |
53 | cv::sqrt(src: sz2, dst: sz2); |
54 | return sz2; |
55 | } |
56 | |
57 | class TrackerDaSiamRPNImpl : public TrackerDaSiamRPN |
58 | { |
59 | public: |
60 | TrackerDaSiamRPNImpl(const TrackerDaSiamRPN::Params& parameters) |
61 | : params(parameters) |
62 | { |
63 | |
64 | siamRPN = dnn::readNet(model: params.model); |
65 | siamKernelCL1 = dnn::readNet(model: params.kernel_cls1); |
66 | siamKernelR1 = dnn::readNet(model: params.kernel_r1); |
67 | |
68 | CV_Assert(!siamRPN.empty()); |
69 | CV_Assert(!siamKernelCL1.empty()); |
70 | CV_Assert(!siamKernelR1.empty()); |
71 | |
72 | siamRPN.setPreferableBackend(params.backend); |
73 | siamRPN.setPreferableTarget(params.target); |
74 | siamKernelR1.setPreferableBackend(params.backend); |
75 | siamKernelR1.setPreferableTarget(params.target); |
76 | siamKernelCL1.setPreferableBackend(params.backend); |
77 | siamKernelCL1.setPreferableTarget(params.target); |
78 | } |
79 | |
80 | void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; |
81 | bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE; |
82 | float getTrackingScore() CV_OVERRIDE; |
83 | |
84 | TrackerDaSiamRPN::Params params; |
85 | |
86 | protected: |
87 | dnn::Net siamRPN, siamKernelR1, siamKernelCL1; |
88 | Rect boundingBox_; |
89 | Mat image_; |
90 | struct trackerConfig |
91 | { |
92 | float windowInfluence = 0.43f; |
93 | float lr = 0.4f; |
94 | int scale = 8; |
95 | bool swapRB = false; |
96 | int totalStride = 8; |
97 | float penaltyK = 0.055f; |
98 | int exemplarSize = 127; |
99 | int instanceSize = 271; |
100 | float contextAmount = 0.5f; |
101 | std::vector<float> ratios = { 0.33f, 0.5f, 1.0f, 2.0f, 3.0f }; |
102 | int anchorNum = int(ratios.size()); |
103 | Mat anchors; |
104 | Mat windows; |
105 | Scalar avgChans; |
106 | Size imgSize = { 0, 0 }; |
107 | Rect2f targetBox = { 0, 0, 0, 0 }; |
108 | int scoreSize = (instanceSize - exemplarSize) / totalStride + 1; |
109 | float tracking_score; |
110 | |
111 | void update_scoreSize() |
112 | { |
113 | scoreSize = int((instanceSize - exemplarSize) / totalStride + 1); |
114 | } |
115 | }; |
116 | trackerConfig trackState; |
117 | |
118 | void softmax(const Mat& src, Mat& dst); |
119 | void elementMax(Mat& src); |
120 | Mat generateHanningWindow(); |
121 | Mat generateAnchors(); |
122 | Mat getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans); |
123 | void trackerInit(Mat img); |
124 | void trackerEval(Mat img); |
125 | }; |
126 | |
127 | void TrackerDaSiamRPNImpl::init(InputArray image, const Rect& boundingBox) |
128 | { |
129 | image_ = image.getMat().clone(); |
130 | |
131 | trackState.update_scoreSize(); |
132 | trackState.targetBox = Rect2f( |
133 | float(boundingBox.x) + float(boundingBox.width) * 0.5f, // FIXIT don't use center in Rect structures, it is confusing |
134 | float(boundingBox.y) + float(boundingBox.height) * 0.5f, |
135 | float(boundingBox.width), |
136 | float(boundingBox.height) |
137 | ); |
138 | trackerInit(img: image_); |
139 | } |
140 | |
141 | void TrackerDaSiamRPNImpl::trackerInit(Mat img) |
142 | { |
143 | Rect2f targetBox = trackState.targetBox; |
144 | Mat anchors = generateAnchors(); |
145 | trackState.anchors = anchors; |
146 | |
147 | Mat windows = generateHanningWindow(); |
148 | |
149 | trackState.windows = windows; |
150 | trackState.imgSize = img.size(); |
151 | |
152 | trackState.avgChans = mean(src: img); |
153 | float wc = targetBox.width + trackState.contextAmount * (targetBox.width + targetBox.height); |
154 | float hc = targetBox.height + trackState.contextAmount * (targetBox.width + targetBox.height); |
155 | float sz = (float)cvRound(value: sqrt(x: wc * hc)); |
156 | |
157 | Mat zCrop = getSubwindow(img, targetBox, originalSize: sz, avgChans: trackState.avgChans); |
158 | Mat blob; |
159 | |
160 | dnn::blobFromImage(image: zCrop, blob, scalefactor: 1.0, size: Size(trackState.exemplarSize, trackState.exemplarSize), mean: Scalar(), swapRB: trackState.swapRB, crop: false, CV_32F); |
161 | siamRPN.setInput(blob); |
162 | Mat out1; |
163 | siamRPN.forward(outputBlobs: out1, outputName: "onnx_node_output_0!63" ); |
164 | |
165 | siamKernelCL1.setInput(blob: out1); |
166 | siamKernelR1.setInput(blob: out1); |
167 | |
168 | Mat cls1 = siamKernelCL1.forward(); |
169 | Mat r1 = siamKernelR1.forward(); |
170 | std::vector<int> r1_shape = { 20, 256, 4, 4 }, cls1_shape = { 10, 256, 4, 4 }; |
171 | |
172 | siamRPN.setParam(layer: siamRPN.getLayerId(layer: "onnx_node_output_0!65" ), numParam: 0, blob: r1.reshape(cn: 0, newshape: r1_shape)); |
173 | siamRPN.setParam(layer: siamRPN.getLayerId(layer: "onnx_node_output_0!68" ), numParam: 0, blob: cls1.reshape(cn: 0, newshape: cls1_shape)); |
174 | } |
175 | |
176 | bool TrackerDaSiamRPNImpl::update(InputArray image, Rect& boundingBox) |
177 | { |
178 | image_ = image.getMat().clone(); |
179 | trackerEval(img: image_); |
180 | boundingBox = { |
181 | int(trackState.targetBox.x - int(trackState.targetBox.width / 2)), |
182 | int(trackState.targetBox.y - int(trackState.targetBox.height / 2)), |
183 | int(trackState.targetBox.width), |
184 | int(trackState.targetBox.height) |
185 | }; |
186 | return true; |
187 | } |
188 | |
189 | void TrackerDaSiamRPNImpl::trackerEval(Mat img) |
190 | { |
191 | Rect2f targetBox = trackState.targetBox; |
192 | |
193 | float wc = targetBox.height + trackState.contextAmount * (targetBox.width + targetBox.height); |
194 | float hc = targetBox.width + trackState.contextAmount * (targetBox.width + targetBox.height); |
195 | |
196 | float sz = sqrt(x: wc * hc); |
197 | float scaleZ = trackState.exemplarSize / sz; |
198 | |
199 | float searchSize = float((trackState.instanceSize - trackState.exemplarSize) / 2); |
200 | float pad = searchSize / scaleZ; |
201 | float sx = sz + 2 * pad; |
202 | |
203 | Mat xCrop = getSubwindow(img, targetBox, originalSize: (float)cvRound(value: sx), avgChans: trackState.avgChans); |
204 | |
205 | Mat blob; |
206 | std::vector<Mat> outs; |
207 | std::vector<String> outNames; |
208 | Mat delta, score; |
209 | Mat sc, rc, penalty, pscore; |
210 | |
211 | dnn::blobFromImage(image: xCrop, blob, scalefactor: 1.0, size: Size(trackState.instanceSize, trackState.instanceSize), mean: Scalar(), swapRB: trackState.swapRB, crop: false, CV_32F); |
212 | |
213 | siamRPN.setInput(blob); |
214 | |
215 | outNames = siamRPN.getUnconnectedOutLayersNames(); |
216 | siamRPN.forward(outputBlobs: outs, outBlobNames: outNames); |
217 | |
218 | delta = outs[0]; |
219 | score = outs[1]; |
220 | |
221 | score = score.reshape(cn: 0, newshape: { 2, trackState.anchorNum, trackState.scoreSize, trackState.scoreSize }); |
222 | delta = delta.reshape(cn: 0, newshape: { 4, trackState.anchorNum, trackState.scoreSize, trackState.scoreSize }); |
223 | |
224 | softmax(src: score, dst&: score); |
225 | |
226 | targetBox.width *= scaleZ; |
227 | targetBox.height *= scaleZ; |
228 | |
229 | score = score.row(y: 1); |
230 | score = score.reshape(cn: 0, newshape: { 5, 19, 19 }); |
231 | |
232 | // Post processing |
233 | delta.row(y: 0) = delta.row(y: 0).mul(m: trackState.anchors.row(y: 2)) + trackState.anchors.row(y: 0); |
234 | delta.row(y: 1) = delta.row(y: 1).mul(m: trackState.anchors.row(y: 3)) + trackState.anchors.row(y: 1); |
235 | exp(src: delta.row(y: 2), dst: delta.row(y: 2)); |
236 | delta.row(y: 2) = delta.row(y: 2).mul(m: trackState.anchors.row(y: 2)); |
237 | exp(src: delta.row(y: 3), dst: delta.row(y: 3)); |
238 | delta.row(y: 3) = delta.row(y: 3).mul(m: trackState.anchors.row(y: 3)); |
239 | |
240 | sc = sizeCal(w: delta.row(y: 2), h: delta.row(y: 3)) / sizeCal(w: targetBox.width, h: targetBox.height); |
241 | elementMax(src&: sc); |
242 | |
243 | rc = delta.row(y: 2).mul(m: 1 / delta.row(y: 3)); |
244 | rc = (targetBox.width / targetBox.height) / rc; |
245 | elementMax(src&: rc); |
246 | |
247 | // Calculating the penalty |
248 | exp(src: ((rc.mul(m: sc) - 1.) * trackState.penaltyK * (-1.0)), dst: penalty); |
249 | penalty = penalty.reshape(cn: 0, newshape: { trackState.anchorNum, trackState.scoreSize, trackState.scoreSize }); |
250 | |
251 | pscore = penalty.mul(m: score); |
252 | pscore = pscore * (1.0 - trackState.windowInfluence) + trackState.windows * trackState.windowInfluence; |
253 | |
254 | int bestID[2] = { 0, 0 }; |
255 | // Find the index of best score. |
256 | minMaxIdx(src: pscore.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 }), minVal: 0, maxVal: 0, minIdx: 0, maxIdx: bestID); |
257 | delta = delta.reshape(cn: 0, newshape: { 4, trackState.anchorNum * trackState.scoreSize * trackState.scoreSize }); |
258 | penalty = penalty.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 }); |
259 | score = score.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 }); |
260 | |
261 | int index[2] = { 0, bestID[0] }; |
262 | Rect2f resBox = { 0, 0, 0, 0 }; |
263 | |
264 | resBox.x = delta.at<float>(idx: index) / scaleZ; |
265 | index[0] = 1; |
266 | resBox.y = delta.at<float>(idx: index) / scaleZ; |
267 | index[0] = 2; |
268 | resBox.width = delta.at<float>(idx: index) / scaleZ; |
269 | index[0] = 3; |
270 | resBox.height = delta.at<float>(idx: index) / scaleZ; |
271 | |
272 | float lr = penalty.at<float>(idx: bestID) * score.at<float>(idx: bestID) * trackState.lr; |
273 | |
274 | resBox.x = resBox.x + targetBox.x; |
275 | resBox.y = resBox.y + targetBox.y; |
276 | targetBox.width /= scaleZ; |
277 | targetBox.height /= scaleZ; |
278 | |
279 | resBox.width = targetBox.width * (1 - lr) + resBox.width * lr; |
280 | resBox.height = targetBox.height * (1 - lr) + resBox.height * lr; |
281 | |
282 | resBox.x = float(fmax(x: 0., y: fmin(x: float(trackState.imgSize.width), y: resBox.x))); |
283 | resBox.y = float(fmax(x: 0., y: fmin(x: float(trackState.imgSize.height), y: resBox.y))); |
284 | resBox.width = float(fmax(x: 10., y: fmin(x: float(trackState.imgSize.width), y: resBox.width))); |
285 | resBox.height = float(fmax(x: 10., y: fmin(x: float(trackState.imgSize.height), y: resBox.height))); |
286 | |
287 | trackState.targetBox = resBox; |
288 | trackState.tracking_score = score.at<float>(idx: bestID); |
289 | } |
290 | |
291 | float TrackerDaSiamRPNImpl::getTrackingScore() |
292 | { |
293 | return trackState.tracking_score; |
294 | } |
295 | |
296 | void TrackerDaSiamRPNImpl::softmax(const Mat& src, Mat& dst) |
297 | { |
298 | Mat maxVal; |
299 | cv::max(src1: src.row(y: 1), src2: src.row(y: 0), dst&: maxVal); |
300 | |
301 | src.row(y: 1) -= maxVal; |
302 | src.row(y: 0) -= maxVal; |
303 | |
304 | exp(src, dst); |
305 | |
306 | Mat sumVal = dst.row(y: 0) + dst.row(y: 1); |
307 | dst.row(y: 0) = dst.row(y: 0) / sumVal; |
308 | dst.row(y: 1) = dst.row(y: 1) / sumVal; |
309 | } |
310 | |
311 | void TrackerDaSiamRPNImpl::elementMax(Mat& src) |
312 | { |
313 | int* p = src.size.p; |
314 | int index[4] = { 0, 0, 0, 0 }; |
315 | for (int n = 0; n < *p; n++) |
316 | { |
317 | for (int k = 0; k < *(p + 1); k++) |
318 | { |
319 | for (int i = 0; i < *(p + 2); i++) |
320 | { |
321 | for (int j = 0; j < *(p + 3); j++) |
322 | { |
323 | index[0] = n, index[1] = k, index[2] = i, index[3] = j; |
324 | float& v = src.at<float>(idx: index); |
325 | v = fmax(x: v, y: 1.0f / v); |
326 | } |
327 | } |
328 | } |
329 | } |
330 | } |
331 | |
332 | Mat TrackerDaSiamRPNImpl::generateHanningWindow() |
333 | { |
334 | Mat baseWindows, HanningWindows; |
335 | |
336 | createHanningWindow(dst: baseWindows, winSize: Size(trackState.scoreSize, trackState.scoreSize), CV_32F); |
337 | baseWindows = baseWindows.reshape(cn: 0, newshape: { 1, trackState.scoreSize, trackState.scoreSize }); |
338 | HanningWindows = baseWindows.clone(); |
339 | for (int i = 1; i < trackState.anchorNum; i++) |
340 | { |
341 | HanningWindows.push_back(m: baseWindows); |
342 | } |
343 | |
344 | return HanningWindows; |
345 | } |
346 | |
347 | Mat TrackerDaSiamRPNImpl::generateAnchors() |
348 | { |
349 | int totalStride = trackState.totalStride, scales = trackState.scale, scoreSize = trackState.scoreSize; |
350 | std::vector<float> ratios = trackState.ratios; |
351 | std::vector<Rect2f> baseAnchors; |
352 | int anchorNum = int(ratios.size()); |
353 | int size = totalStride * totalStride; |
354 | |
355 | float ori = -(float(scoreSize / 2)) * float(totalStride); |
356 | |
357 | for (auto i = 0; i < anchorNum; i++) |
358 | { |
359 | int ws = int(sqrt(x: size / ratios[i])); |
360 | int hs = int(ws * ratios[i]); |
361 | |
362 | float wws = float(ws) * scales; |
363 | float hhs = float(hs) * scales; |
364 | Rect2f anchor = { 0, 0, wws, hhs }; |
365 | baseAnchors.push_back(x: anchor); |
366 | } |
367 | |
368 | int anchorIndex[4] = { 0, 0, 0, 0 }; |
369 | const int sizes[4] = { 4, (int)ratios.size(), scoreSize, scoreSize }; |
370 | Mat anchors(4, sizes, CV_32F); |
371 | |
372 | for (auto i = 0; i < scoreSize; i++) |
373 | { |
374 | for (auto j = 0; j < scoreSize; j++) |
375 | { |
376 | for (auto k = 0; k < anchorNum; k++) |
377 | { |
378 | anchorIndex[0] = 1, anchorIndex[1] = k, anchorIndex[2] = i, anchorIndex[3] = j; |
379 | anchors.at<float>(idx: anchorIndex) = ori + totalStride * i; |
380 | |
381 | anchorIndex[0] = 0; |
382 | anchors.at<float>(idx: anchorIndex) = ori + totalStride * j; |
383 | |
384 | anchorIndex[0] = 2; |
385 | anchors.at<float>(idx: anchorIndex) = baseAnchors[k].width; |
386 | |
387 | anchorIndex[0] = 3; |
388 | anchors.at<float>(idx: anchorIndex) = baseAnchors[k].height; |
389 | } |
390 | } |
391 | } |
392 | |
393 | return anchors; |
394 | } |
395 | |
396 | Mat TrackerDaSiamRPNImpl::getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans) |
397 | { |
398 | Mat zCrop, dst; |
399 | Size imgSize = img.size(); |
400 | float c = (originalSize + 1) / 2; |
401 | float xMin = (float)cvRound(value: targetBox.x - c); |
402 | float xMax = xMin + originalSize - 1; |
403 | float yMin = (float)cvRound(value: targetBox.y - c); |
404 | float yMax = yMin + originalSize - 1; |
405 | |
406 | int leftPad = (int)(fmax(x: 0., y: -xMin)); |
407 | int topPad = (int)(fmax(x: 0., y: -yMin)); |
408 | int rightPad = (int)(fmax(x: 0., y: xMax - imgSize.width + 1)); |
409 | int bottomPad = (int)(fmax(x: 0., y: yMax - imgSize.height + 1)); |
410 | |
411 | xMin = xMin + leftPad; |
412 | xMax = xMax + leftPad; |
413 | yMax = yMax + topPad; |
414 | yMin = yMin + topPad; |
415 | |
416 | if (topPad == 0 && bottomPad == 0 && leftPad == 0 && rightPad == 0) |
417 | { |
418 | img(Rect(int(xMin), int(yMin), int(xMax - xMin + 1), int(yMax - yMin + 1))).copyTo(m: zCrop); |
419 | } |
420 | else |
421 | { |
422 | copyMakeBorder(src: img, dst, top: topPad, bottom: bottomPad, left: leftPad, right: rightPad, borderType: BORDER_CONSTANT, value: avgChans); |
423 | dst(Rect(int(xMin), int(yMin), int(xMax - xMin + 1), int(yMax - yMin + 1))).copyTo(m: zCrop); |
424 | } |
425 | |
426 | return zCrop; |
427 | } |
428 | Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters) |
429 | { |
430 | return makePtr<TrackerDaSiamRPNImpl>(a1: parameters); |
431 | } |
432 | |
433 | #else // OPENCV_HAVE_DNN |
434 | Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters) |
435 | { |
436 | (void)(parameters); |
437 | CV_Error(cv::Error::StsNotImplemented, "to use GOTURN, the tracking module needs to be built with opencv_dnn !" ); |
438 | } |
439 | #endif // OPENCV_HAVE_DNN |
440 | } |
441 | |