1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5#include "../precomp.hpp"
6
7#ifdef HAVE_OPENCV_DNN
8#include "opencv2/dnn.hpp"
9#endif
10
11namespace cv {
12
13TrackerDaSiamRPN::TrackerDaSiamRPN()
14{
15 // nothing
16}
17
18TrackerDaSiamRPN::~TrackerDaSiamRPN()
19{
20 // nothing
21}
22
23TrackerDaSiamRPN::Params::Params()
24{
25 model = "dasiamrpn_model.onnx";
26 kernel_cls1 = "dasiamrpn_kernel_cls1.onnx";
27 kernel_r1 = "dasiamrpn_kernel_r1.onnx";
28#ifdef HAVE_OPENCV_DNN
29 backend = dnn::DNN_BACKEND_DEFAULT;
30 target = dnn::DNN_TARGET_CPU;
31#else
32 backend = -1; // invalid value
33 target = -1; // invalid value
34#endif
35}
36
37#ifdef HAVE_OPENCV_DNN
38
39template <typename T> static
40T sizeCal(const T& w, const T& h)
41{
42 T pad = (w + h) * T(0.5);
43 T sz2 = (w + pad) * (h + pad);
44 return sqrt(sz2);
45}
46
47template <>
48Mat sizeCal(const Mat& w, const Mat& h)
49{
50 Mat pad = (w + h) * 0.5;
51 Mat sz2 = (w + pad).mul(e: (h + pad));
52
53 cv::sqrt(src: sz2, dst: sz2);
54 return sz2;
55}
56
57class TrackerDaSiamRPNImpl : public TrackerDaSiamRPN
58{
59public:
60 TrackerDaSiamRPNImpl(const TrackerDaSiamRPN::Params& params)
61 {
62 siamRPN = dnn::readNet(model: params.model);
63 siamKernelCL1 = dnn::readNet(model: params.kernel_cls1);
64 siamKernelR1 = dnn::readNet(model: params.kernel_r1);
65
66 CV_Assert(!siamRPN.empty());
67 CV_Assert(!siamKernelCL1.empty());
68 CV_Assert(!siamKernelR1.empty());
69
70 siamRPN.setPreferableBackend(params.backend);
71 siamRPN.setPreferableTarget(params.target);
72 siamKernelR1.setPreferableBackend(params.backend);
73 siamKernelR1.setPreferableTarget(params.target);
74 siamKernelCL1.setPreferableBackend(params.backend);
75 siamKernelCL1.setPreferableTarget(params.target);
76 }
77
78 TrackerDaSiamRPNImpl(const dnn::Net& siam_rpn, const dnn::Net& kernel_cls1, const dnn::Net& kernel_r1)
79 {
80 CV_Assert(!siam_rpn.empty());
81 CV_Assert(!kernel_cls1.empty());
82 CV_Assert(!kernel_r1.empty());
83
84 siamRPN = siam_rpn;
85 siamKernelCL1 = kernel_cls1;
86 siamKernelR1 = kernel_r1;
87 }
88
89 void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
90 bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
91 float getTrackingScore() CV_OVERRIDE;
92
93protected:
94 dnn::Net siamRPN, siamKernelR1, siamKernelCL1;
95 Rect boundingBox_;
96 Mat image_;
97 struct trackerConfig
98 {
99 float windowInfluence = 0.43f;
100 float lr = 0.4f;
101 int scale = 8;
102 bool swapRB = false;
103 int totalStride = 8;
104 float penaltyK = 0.055f;
105 int exemplarSize = 127;
106 int instanceSize = 271;
107 float contextAmount = 0.5f;
108 std::vector<float> ratios = { 0.33f, 0.5f, 1.0f, 2.0f, 3.0f };
109 int anchorNum = int(ratios.size());
110 Mat anchors;
111 Mat windows;
112 Scalar avgChans;
113 Size imgSize = { 0, 0 };
114 Rect2f targetBox = { 0, 0, 0, 0 };
115 int scoreSize = (instanceSize - exemplarSize) / totalStride + 1;
116 float tracking_score;
117
118 void update_scoreSize()
119 {
120 scoreSize = int((instanceSize - exemplarSize) / totalStride + 1);
121 }
122 };
123 trackerConfig trackState;
124
125 void softmax(const Mat& src, Mat& dst);
126 void elementMax(Mat& src);
127 Mat generateHanningWindow();
128 Mat generateAnchors();
129 Mat getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans);
130 void trackerInit(Mat img);
131 void trackerEval(Mat img);
132};
133
134void TrackerDaSiamRPNImpl::init(InputArray image, const Rect& boundingBox)
135{
136 image_ = image.getMat().clone();
137
138 trackState.update_scoreSize();
139 trackState.targetBox = Rect2f(
140 float(boundingBox.x) + float(boundingBox.width) * 0.5f, // FIXIT don't use center in Rect structures, it is confusing
141 float(boundingBox.y) + float(boundingBox.height) * 0.5f,
142 float(boundingBox.width),
143 float(boundingBox.height)
144 );
145 trackerInit(img: image_);
146}
147
148void TrackerDaSiamRPNImpl::trackerInit(Mat img)
149{
150 Rect2f targetBox = trackState.targetBox;
151 Mat anchors = generateAnchors();
152 trackState.anchors = anchors;
153
154 Mat windows = generateHanningWindow();
155
156 trackState.windows = windows;
157 trackState.imgSize = img.size();
158
159 trackState.avgChans = mean(src: img);
160 float wc = targetBox.width + trackState.contextAmount * (targetBox.width + targetBox.height);
161 float hc = targetBox.height + trackState.contextAmount * (targetBox.width + targetBox.height);
162 float sz = (float)cvRound(value: sqrt(x: wc * hc));
163
164 Mat zCrop = getSubwindow(img, targetBox, originalSize: sz, avgChans: trackState.avgChans);
165 Mat blob;
166
167 dnn::blobFromImage(image: zCrop, blob, scalefactor: 1.0, size: Size(trackState.exemplarSize, trackState.exemplarSize), mean: Scalar(), swapRB: trackState.swapRB, crop: false, CV_32F);
168 siamRPN.setInput(blob);
169 Mat out1;
170 siamRPN.forward(outputBlobs: out1, outputName: "onnx_node_output_0!63");
171
172 siamKernelCL1.setInput(blob: out1);
173 siamKernelR1.setInput(blob: out1);
174
175 Mat cls1 = siamKernelCL1.forward();
176 Mat r1 = siamKernelR1.forward();
177 std::vector<int> r1_shape = { 20, 256, 4, 4 }, cls1_shape = { 10, 256, 4, 4 };
178
179 siamRPN.setParam(layer: siamRPN.getLayerId(layer: "onnx_node_output_0!65"), numParam: 0, blob: r1.reshape(cn: 0, newshape: r1_shape));
180 siamRPN.setParam(layer: siamRPN.getLayerId(layer: "onnx_node_output_0!68"), numParam: 0, blob: cls1.reshape(cn: 0, newshape: cls1_shape));
181}
182
183bool TrackerDaSiamRPNImpl::update(InputArray image, Rect& boundingBox)
184{
185 image_ = image.getMat().clone();
186 trackerEval(img: image_);
187 boundingBox = {
188 int(trackState.targetBox.x - int(trackState.targetBox.width / 2)),
189 int(trackState.targetBox.y - int(trackState.targetBox.height / 2)),
190 int(trackState.targetBox.width),
191 int(trackState.targetBox.height)
192 };
193 return true;
194}
195
196void TrackerDaSiamRPNImpl::trackerEval(Mat img)
197{
198 Rect2f targetBox = trackState.targetBox;
199
200 float wc = targetBox.height + trackState.contextAmount * (targetBox.width + targetBox.height);
201 float hc = targetBox.width + trackState.contextAmount * (targetBox.width + targetBox.height);
202
203 float sz = sqrt(x: wc * hc);
204 float scaleZ = trackState.exemplarSize / sz;
205
206 float searchSize = float((trackState.instanceSize - trackState.exemplarSize) / 2);
207 float pad = searchSize / scaleZ;
208 float sx = sz + 2 * pad;
209
210 Mat xCrop = getSubwindow(img, targetBox, originalSize: (float)cvRound(value: sx), avgChans: trackState.avgChans);
211
212 Mat blob;
213 std::vector<Mat> outs;
214 std::vector<String> outNames;
215 Mat delta, score;
216 Mat sc, rc, penalty, pscore;
217
218 dnn::blobFromImage(image: xCrop, blob, scalefactor: 1.0, size: Size(trackState.instanceSize, trackState.instanceSize), mean: Scalar(), swapRB: trackState.swapRB, crop: false, CV_32F);
219
220 siamRPN.setInput(blob);
221
222 outNames = siamRPN.getUnconnectedOutLayersNames();
223 siamRPN.forward(outputBlobs: outs, outBlobNames: outNames);
224
225 delta = outs[0];
226 score = outs[1];
227
228 score = score.reshape(cn: 0, newshape: { 2, trackState.anchorNum, trackState.scoreSize, trackState.scoreSize });
229 delta = delta.reshape(cn: 0, newshape: { 4, trackState.anchorNum, trackState.scoreSize, trackState.scoreSize });
230
231 softmax(src: score, dst&: score);
232
233 targetBox.width *= scaleZ;
234 targetBox.height *= scaleZ;
235
236 score = score.row(y: 1);
237 score = score.reshape(cn: 0, newshape: { 5, 19, 19 });
238
239 // Post processing
240 delta.row(y: 0) = delta.row(y: 0).mul(m: trackState.anchors.row(y: 2)) + trackState.anchors.row(y: 0);
241 delta.row(y: 1) = delta.row(y: 1).mul(m: trackState.anchors.row(y: 3)) + trackState.anchors.row(y: 1);
242 exp(src: delta.row(y: 2), dst: delta.row(y: 2));
243 delta.row(y: 2) = delta.row(y: 2).mul(m: trackState.anchors.row(y: 2));
244 exp(src: delta.row(y: 3), dst: delta.row(y: 3));
245 delta.row(y: 3) = delta.row(y: 3).mul(m: trackState.anchors.row(y: 3));
246
247 sc = sizeCal(w: delta.row(y: 2), h: delta.row(y: 3)) / sizeCal(w: targetBox.width, h: targetBox.height);
248 elementMax(src&: sc);
249
250 rc = delta.row(y: 2).mul(m: 1 / delta.row(y: 3));
251 rc = (targetBox.width / targetBox.height) / rc;
252 elementMax(src&: rc);
253
254 // Calculating the penalty
255 exp(src: ((rc.mul(m: sc) - 1.) * trackState.penaltyK * (-1.0)), dst: penalty);
256 penalty = penalty.reshape(cn: 0, newshape: { trackState.anchorNum, trackState.scoreSize, trackState.scoreSize });
257
258 pscore = penalty.mul(m: score);
259 pscore = pscore * (1.0 - trackState.windowInfluence) + trackState.windows * trackState.windowInfluence;
260
261 int bestID[2] = { 0, 0 };
262 // Find the index of best score.
263 minMaxIdx(src: pscore.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 }), minVal: 0, maxVal: 0, minIdx: 0, maxIdx: bestID);
264 delta = delta.reshape(cn: 0, newshape: { 4, trackState.anchorNum * trackState.scoreSize * trackState.scoreSize });
265 penalty = penalty.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 });
266 score = score.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 });
267
268 int index[2] = { 0, bestID[0] };
269 Rect2f resBox = { 0, 0, 0, 0 };
270
271 resBox.x = delta.at<float>(idx: index) / scaleZ;
272 index[0] = 1;
273 resBox.y = delta.at<float>(idx: index) / scaleZ;
274 index[0] = 2;
275 resBox.width = delta.at<float>(idx: index) / scaleZ;
276 index[0] = 3;
277 resBox.height = delta.at<float>(idx: index) / scaleZ;
278
279 float lr = penalty.at<float>(idx: bestID) * score.at<float>(idx: bestID) * trackState.lr;
280
281 resBox.x = resBox.x + targetBox.x;
282 resBox.y = resBox.y + targetBox.y;
283 targetBox.width /= scaleZ;
284 targetBox.height /= scaleZ;
285
286 resBox.width = targetBox.width * (1 - lr) + resBox.width * lr;
287 resBox.height = targetBox.height * (1 - lr) + resBox.height * lr;
288
289 resBox.x = float(fmax(x: 0., y: fmin(x: float(trackState.imgSize.width), y: resBox.x)));
290 resBox.y = float(fmax(x: 0., y: fmin(x: float(trackState.imgSize.height), y: resBox.y)));
291 resBox.width = float(fmax(x: 10., y: fmin(x: float(trackState.imgSize.width), y: resBox.width)));
292 resBox.height = float(fmax(x: 10., y: fmin(x: float(trackState.imgSize.height), y: resBox.height)));
293
294 trackState.targetBox = resBox;
295 trackState.tracking_score = score.at<float>(idx: bestID);
296}
297
298float TrackerDaSiamRPNImpl::getTrackingScore()
299{
300 return trackState.tracking_score;
301}
302
303void TrackerDaSiamRPNImpl::softmax(const Mat& src, Mat& dst)
304{
305 Mat maxVal;
306 cv::max(src1: src.row(y: 1), src2: src.row(y: 0), dst&: maxVal);
307
308 src.row(y: 1) -= maxVal;
309 src.row(y: 0) -= maxVal;
310
311 exp(src, dst);
312
313 Mat sumVal = dst.row(y: 0) + dst.row(y: 1);
314 dst.row(y: 0) = dst.row(y: 0) / sumVal;
315 dst.row(y: 1) = dst.row(y: 1) / sumVal;
316}
317
318void TrackerDaSiamRPNImpl::elementMax(Mat& src)
319{
320 int* p = src.size.p;
321 int index[4] = { 0, 0, 0, 0 };
322 for (int n = 0; n < *p; n++)
323 {
324 for (int k = 0; k < *(p + 1); k++)
325 {
326 for (int i = 0; i < *(p + 2); i++)
327 {
328 for (int j = 0; j < *(p + 3); j++)
329 {
330 index[0] = n, index[1] = k, index[2] = i, index[3] = j;
331 float& v = src.at<float>(idx: index);
332 v = fmax(x: v, y: 1.0f / v);
333 }
334 }
335 }
336 }
337}
338
339Mat TrackerDaSiamRPNImpl::generateHanningWindow()
340{
341 Mat baseWindows, HanningWindows;
342
343 createHanningWindow(dst: baseWindows, winSize: Size(trackState.scoreSize, trackState.scoreSize), CV_32F);
344 baseWindows = baseWindows.reshape(cn: 0, newshape: { 1, trackState.scoreSize, trackState.scoreSize });
345 HanningWindows = baseWindows.clone();
346 for (int i = 1; i < trackState.anchorNum; i++)
347 {
348 HanningWindows.push_back(m: baseWindows);
349 }
350
351 return HanningWindows;
352}
353
354Mat TrackerDaSiamRPNImpl::generateAnchors()
355{
356 int totalStride = trackState.totalStride, scales = trackState.scale, scoreSize = trackState.scoreSize;
357 std::vector<float> ratios = trackState.ratios;
358 std::vector<Rect2f> baseAnchors;
359 int anchorNum = int(ratios.size());
360 int size = totalStride * totalStride;
361
362 float ori = -(float(scoreSize / 2)) * float(totalStride);
363
364 for (auto i = 0; i < anchorNum; i++)
365 {
366 int ws = int(sqrt(x: size / ratios[i]));
367 int hs = int(ws * ratios[i]);
368
369 float wws = float(ws) * scales;
370 float hhs = float(hs) * scales;
371 Rect2f anchor = { 0, 0, wws, hhs };
372 baseAnchors.push_back(x: anchor);
373 }
374
375 int anchorIndex[4] = { 0, 0, 0, 0 };
376 const int sizes[4] = { 4, (int)ratios.size(), scoreSize, scoreSize };
377 Mat anchors(4, sizes, CV_32F);
378
379 for (auto i = 0; i < scoreSize; i++)
380 {
381 for (auto j = 0; j < scoreSize; j++)
382 {
383 for (auto k = 0; k < anchorNum; k++)
384 {
385 anchorIndex[0] = 1, anchorIndex[1] = k, anchorIndex[2] = i, anchorIndex[3] = j;
386 anchors.at<float>(idx: anchorIndex) = ori + totalStride * i;
387
388 anchorIndex[0] = 0;
389 anchors.at<float>(idx: anchorIndex) = ori + totalStride * j;
390
391 anchorIndex[0] = 2;
392 anchors.at<float>(idx: anchorIndex) = baseAnchors[k].width;
393
394 anchorIndex[0] = 3;
395 anchors.at<float>(idx: anchorIndex) = baseAnchors[k].height;
396 }
397 }
398 }
399
400 return anchors;
401}
402
403Mat TrackerDaSiamRPNImpl::getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans)
404{
405 Mat zCrop, dst;
406 Size imgSize = img.size();
407 float c = (originalSize + 1) / 2;
408 float xMin = (float)cvRound(value: targetBox.x - c);
409 float xMax = xMin + originalSize - 1;
410 float yMin = (float)cvRound(value: targetBox.y - c);
411 float yMax = yMin + originalSize - 1;
412
413 int leftPad = (int)(fmax(x: 0., y: -xMin));
414 int topPad = (int)(fmax(x: 0., y: -yMin));
415 int rightPad = (int)(fmax(x: 0., y: xMax - imgSize.width + 1));
416 int bottomPad = (int)(fmax(x: 0., y: yMax - imgSize.height + 1));
417
418 xMin = xMin + leftPad;
419 xMax = xMax + leftPad;
420 yMax = yMax + topPad;
421 yMin = yMin + topPad;
422
423 if (topPad == 0 && bottomPad == 0 && leftPad == 0 && rightPad == 0)
424 {
425 img(Rect(int(xMin), int(yMin), int(xMax - xMin + 1), int(yMax - yMin + 1))).copyTo(m: zCrop);
426 }
427 else
428 {
429 copyMakeBorder(src: img, dst, top: topPad, bottom: bottomPad, left: leftPad, right: rightPad, borderType: BORDER_CONSTANT, value: avgChans);
430 dst(Rect(int(xMin), int(yMin), int(xMax - xMin + 1), int(yMax - yMin + 1))).copyTo(m: zCrop);
431 }
432
433 return zCrop;
434}
435
436Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters)
437{
438 return makePtr<TrackerDaSiamRPNImpl>(a1: parameters);
439}
440
441Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const dnn::Net& siam_rpn, const dnn::Net& kernel_cls1, const dnn::Net& kernel_r1)
442{
443 return makePtr<TrackerDaSiamRPNImpl>(a1: siam_rpn, a1: kernel_cls1, a1: kernel_r1);
444}
445
446#else // OPENCV_HAVE_DNN
447Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters)
448{
449 (void)(parameters);
450 CV_Error(cv::Error::StsNotImplemented, "to use DaSiamRPN, the tracking module needs to be built with opencv_dnn !");
451}
452#endif // OPENCV_HAVE_DNN
453}
454

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of opencv/modules/video/src/tracking/tracker_dasiamrpn.cpp