1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5#include "../precomp.hpp"
6
7#ifdef HAVE_OPENCV_DNN
8#include "opencv2/dnn.hpp"
9#endif
10
11namespace cv {
12
13TrackerDaSiamRPN::TrackerDaSiamRPN()
14{
15 // nothing
16}
17
18TrackerDaSiamRPN::~TrackerDaSiamRPN()
19{
20 // nothing
21}
22
23TrackerDaSiamRPN::Params::Params()
24{
25 model = "dasiamrpn_model.onnx";
26 kernel_cls1 = "dasiamrpn_kernel_cls1.onnx";
27 kernel_r1 = "dasiamrpn_kernel_r1.onnx";
28#ifdef HAVE_OPENCV_DNN
29 backend = dnn::DNN_BACKEND_DEFAULT;
30 target = dnn::DNN_TARGET_CPU;
31#else
32 backend = -1; // invalid value
33 target = -1; // invalid value
34#endif
35}
36
37#ifdef HAVE_OPENCV_DNN
38
39template <typename T> static
40T sizeCal(const T& w, const T& h)
41{
42 T pad = (w + h) * T(0.5);
43 T sz2 = (w + pad) * (h + pad);
44 return sqrt(sz2);
45}
46
47template <>
48Mat sizeCal(const Mat& w, const Mat& h)
49{
50 Mat pad = (w + h) * 0.5;
51 Mat sz2 = (w + pad).mul(e: (h + pad));
52
53 cv::sqrt(src: sz2, dst: sz2);
54 return sz2;
55}
56
57class TrackerDaSiamRPNImpl : public TrackerDaSiamRPN
58{
59public:
60 TrackerDaSiamRPNImpl(const TrackerDaSiamRPN::Params& parameters)
61 : params(parameters)
62 {
63
64 siamRPN = dnn::readNet(model: params.model);
65 siamKernelCL1 = dnn::readNet(model: params.kernel_cls1);
66 siamKernelR1 = dnn::readNet(model: params.kernel_r1);
67
68 CV_Assert(!siamRPN.empty());
69 CV_Assert(!siamKernelCL1.empty());
70 CV_Assert(!siamKernelR1.empty());
71
72 siamRPN.setPreferableBackend(params.backend);
73 siamRPN.setPreferableTarget(params.target);
74 siamKernelR1.setPreferableBackend(params.backend);
75 siamKernelR1.setPreferableTarget(params.target);
76 siamKernelCL1.setPreferableBackend(params.backend);
77 siamKernelCL1.setPreferableTarget(params.target);
78 }
79
80 void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
81 bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
82 float getTrackingScore() CV_OVERRIDE;
83
84 TrackerDaSiamRPN::Params params;
85
86protected:
87 dnn::Net siamRPN, siamKernelR1, siamKernelCL1;
88 Rect boundingBox_;
89 Mat image_;
90 struct trackerConfig
91 {
92 float windowInfluence = 0.43f;
93 float lr = 0.4f;
94 int scale = 8;
95 bool swapRB = false;
96 int totalStride = 8;
97 float penaltyK = 0.055f;
98 int exemplarSize = 127;
99 int instanceSize = 271;
100 float contextAmount = 0.5f;
101 std::vector<float> ratios = { 0.33f, 0.5f, 1.0f, 2.0f, 3.0f };
102 int anchorNum = int(ratios.size());
103 Mat anchors;
104 Mat windows;
105 Scalar avgChans;
106 Size imgSize = { 0, 0 };
107 Rect2f targetBox = { 0, 0, 0, 0 };
108 int scoreSize = (instanceSize - exemplarSize) / totalStride + 1;
109 float tracking_score;
110
111 void update_scoreSize()
112 {
113 scoreSize = int((instanceSize - exemplarSize) / totalStride + 1);
114 }
115 };
116 trackerConfig trackState;
117
118 void softmax(const Mat& src, Mat& dst);
119 void elementMax(Mat& src);
120 Mat generateHanningWindow();
121 Mat generateAnchors();
122 Mat getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans);
123 void trackerInit(Mat img);
124 void trackerEval(Mat img);
125};
126
127void TrackerDaSiamRPNImpl::init(InputArray image, const Rect& boundingBox)
128{
129 image_ = image.getMat().clone();
130
131 trackState.update_scoreSize();
132 trackState.targetBox = Rect2f(
133 float(boundingBox.x) + float(boundingBox.width) * 0.5f, // FIXIT don't use center in Rect structures, it is confusing
134 float(boundingBox.y) + float(boundingBox.height) * 0.5f,
135 float(boundingBox.width),
136 float(boundingBox.height)
137 );
138 trackerInit(img: image_);
139}
140
141void TrackerDaSiamRPNImpl::trackerInit(Mat img)
142{
143 Rect2f targetBox = trackState.targetBox;
144 Mat anchors = generateAnchors();
145 trackState.anchors = anchors;
146
147 Mat windows = generateHanningWindow();
148
149 trackState.windows = windows;
150 trackState.imgSize = img.size();
151
152 trackState.avgChans = mean(src: img);
153 float wc = targetBox.width + trackState.contextAmount * (targetBox.width + targetBox.height);
154 float hc = targetBox.height + trackState.contextAmount * (targetBox.width + targetBox.height);
155 float sz = (float)cvRound(value: sqrt(x: wc * hc));
156
157 Mat zCrop = getSubwindow(img, targetBox, originalSize: sz, avgChans: trackState.avgChans);
158 Mat blob;
159
160 dnn::blobFromImage(image: zCrop, blob, scalefactor: 1.0, size: Size(trackState.exemplarSize, trackState.exemplarSize), mean: Scalar(), swapRB: trackState.swapRB, crop: false, CV_32F);
161 siamRPN.setInput(blob);
162 Mat out1;
163 siamRPN.forward(outputBlobs: out1, outputName: "onnx_node_output_0!63");
164
165 siamKernelCL1.setInput(blob: out1);
166 siamKernelR1.setInput(blob: out1);
167
168 Mat cls1 = siamKernelCL1.forward();
169 Mat r1 = siamKernelR1.forward();
170 std::vector<int> r1_shape = { 20, 256, 4, 4 }, cls1_shape = { 10, 256, 4, 4 };
171
172 siamRPN.setParam(layer: siamRPN.getLayerId(layer: "onnx_node_output_0!65"), numParam: 0, blob: r1.reshape(cn: 0, newshape: r1_shape));
173 siamRPN.setParam(layer: siamRPN.getLayerId(layer: "onnx_node_output_0!68"), numParam: 0, blob: cls1.reshape(cn: 0, newshape: cls1_shape));
174}
175
176bool TrackerDaSiamRPNImpl::update(InputArray image, Rect& boundingBox)
177{
178 image_ = image.getMat().clone();
179 trackerEval(img: image_);
180 boundingBox = {
181 int(trackState.targetBox.x - int(trackState.targetBox.width / 2)),
182 int(trackState.targetBox.y - int(trackState.targetBox.height / 2)),
183 int(trackState.targetBox.width),
184 int(trackState.targetBox.height)
185 };
186 return true;
187}
188
189void TrackerDaSiamRPNImpl::trackerEval(Mat img)
190{
191 Rect2f targetBox = trackState.targetBox;
192
193 float wc = targetBox.height + trackState.contextAmount * (targetBox.width + targetBox.height);
194 float hc = targetBox.width + trackState.contextAmount * (targetBox.width + targetBox.height);
195
196 float sz = sqrt(x: wc * hc);
197 float scaleZ = trackState.exemplarSize / sz;
198
199 float searchSize = float((trackState.instanceSize - trackState.exemplarSize) / 2);
200 float pad = searchSize / scaleZ;
201 float sx = sz + 2 * pad;
202
203 Mat xCrop = getSubwindow(img, targetBox, originalSize: (float)cvRound(value: sx), avgChans: trackState.avgChans);
204
205 Mat blob;
206 std::vector<Mat> outs;
207 std::vector<String> outNames;
208 Mat delta, score;
209 Mat sc, rc, penalty, pscore;
210
211 dnn::blobFromImage(image: xCrop, blob, scalefactor: 1.0, size: Size(trackState.instanceSize, trackState.instanceSize), mean: Scalar(), swapRB: trackState.swapRB, crop: false, CV_32F);
212
213 siamRPN.setInput(blob);
214
215 outNames = siamRPN.getUnconnectedOutLayersNames();
216 siamRPN.forward(outputBlobs: outs, outBlobNames: outNames);
217
218 delta = outs[0];
219 score = outs[1];
220
221 score = score.reshape(cn: 0, newshape: { 2, trackState.anchorNum, trackState.scoreSize, trackState.scoreSize });
222 delta = delta.reshape(cn: 0, newshape: { 4, trackState.anchorNum, trackState.scoreSize, trackState.scoreSize });
223
224 softmax(src: score, dst&: score);
225
226 targetBox.width *= scaleZ;
227 targetBox.height *= scaleZ;
228
229 score = score.row(y: 1);
230 score = score.reshape(cn: 0, newshape: { 5, 19, 19 });
231
232 // Post processing
233 delta.row(y: 0) = delta.row(y: 0).mul(m: trackState.anchors.row(y: 2)) + trackState.anchors.row(y: 0);
234 delta.row(y: 1) = delta.row(y: 1).mul(m: trackState.anchors.row(y: 3)) + trackState.anchors.row(y: 1);
235 exp(src: delta.row(y: 2), dst: delta.row(y: 2));
236 delta.row(y: 2) = delta.row(y: 2).mul(m: trackState.anchors.row(y: 2));
237 exp(src: delta.row(y: 3), dst: delta.row(y: 3));
238 delta.row(y: 3) = delta.row(y: 3).mul(m: trackState.anchors.row(y: 3));
239
240 sc = sizeCal(w: delta.row(y: 2), h: delta.row(y: 3)) / sizeCal(w: targetBox.width, h: targetBox.height);
241 elementMax(src&: sc);
242
243 rc = delta.row(y: 2).mul(m: 1 / delta.row(y: 3));
244 rc = (targetBox.width / targetBox.height) / rc;
245 elementMax(src&: rc);
246
247 // Calculating the penalty
248 exp(src: ((rc.mul(m: sc) - 1.) * trackState.penaltyK * (-1.0)), dst: penalty);
249 penalty = penalty.reshape(cn: 0, newshape: { trackState.anchorNum, trackState.scoreSize, trackState.scoreSize });
250
251 pscore = penalty.mul(m: score);
252 pscore = pscore * (1.0 - trackState.windowInfluence) + trackState.windows * trackState.windowInfluence;
253
254 int bestID[2] = { 0, 0 };
255 // Find the index of best score.
256 minMaxIdx(src: pscore.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 }), minVal: 0, maxVal: 0, minIdx: 0, maxIdx: bestID);
257 delta = delta.reshape(cn: 0, newshape: { 4, trackState.anchorNum * trackState.scoreSize * trackState.scoreSize });
258 penalty = penalty.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 });
259 score = score.reshape(cn: 0, newshape: { trackState.anchorNum * trackState.scoreSize * trackState.scoreSize, 1 });
260
261 int index[2] = { 0, bestID[0] };
262 Rect2f resBox = { 0, 0, 0, 0 };
263
264 resBox.x = delta.at<float>(idx: index) / scaleZ;
265 index[0] = 1;
266 resBox.y = delta.at<float>(idx: index) / scaleZ;
267 index[0] = 2;
268 resBox.width = delta.at<float>(idx: index) / scaleZ;
269 index[0] = 3;
270 resBox.height = delta.at<float>(idx: index) / scaleZ;
271
272 float lr = penalty.at<float>(idx: bestID) * score.at<float>(idx: bestID) * trackState.lr;
273
274 resBox.x = resBox.x + targetBox.x;
275 resBox.y = resBox.y + targetBox.y;
276 targetBox.width /= scaleZ;
277 targetBox.height /= scaleZ;
278
279 resBox.width = targetBox.width * (1 - lr) + resBox.width * lr;
280 resBox.height = targetBox.height * (1 - lr) + resBox.height * lr;
281
282 resBox.x = float(fmax(x: 0., y: fmin(x: float(trackState.imgSize.width), y: resBox.x)));
283 resBox.y = float(fmax(x: 0., y: fmin(x: float(trackState.imgSize.height), y: resBox.y)));
284 resBox.width = float(fmax(x: 10., y: fmin(x: float(trackState.imgSize.width), y: resBox.width)));
285 resBox.height = float(fmax(x: 10., y: fmin(x: float(trackState.imgSize.height), y: resBox.height)));
286
287 trackState.targetBox = resBox;
288 trackState.tracking_score = score.at<float>(idx: bestID);
289}
290
291float TrackerDaSiamRPNImpl::getTrackingScore()
292{
293 return trackState.tracking_score;
294}
295
296void TrackerDaSiamRPNImpl::softmax(const Mat& src, Mat& dst)
297{
298 Mat maxVal;
299 cv::max(src1: src.row(y: 1), src2: src.row(y: 0), dst&: maxVal);
300
301 src.row(y: 1) -= maxVal;
302 src.row(y: 0) -= maxVal;
303
304 exp(src, dst);
305
306 Mat sumVal = dst.row(y: 0) + dst.row(y: 1);
307 dst.row(y: 0) = dst.row(y: 0) / sumVal;
308 dst.row(y: 1) = dst.row(y: 1) / sumVal;
309}
310
311void TrackerDaSiamRPNImpl::elementMax(Mat& src)
312{
313 int* p = src.size.p;
314 int index[4] = { 0, 0, 0, 0 };
315 for (int n = 0; n < *p; n++)
316 {
317 for (int k = 0; k < *(p + 1); k++)
318 {
319 for (int i = 0; i < *(p + 2); i++)
320 {
321 for (int j = 0; j < *(p + 3); j++)
322 {
323 index[0] = n, index[1] = k, index[2] = i, index[3] = j;
324 float& v = src.at<float>(idx: index);
325 v = fmax(x: v, y: 1.0f / v);
326 }
327 }
328 }
329 }
330}
331
332Mat TrackerDaSiamRPNImpl::generateHanningWindow()
333{
334 Mat baseWindows, HanningWindows;
335
336 createHanningWindow(dst: baseWindows, winSize: Size(trackState.scoreSize, trackState.scoreSize), CV_32F);
337 baseWindows = baseWindows.reshape(cn: 0, newshape: { 1, trackState.scoreSize, trackState.scoreSize });
338 HanningWindows = baseWindows.clone();
339 for (int i = 1; i < trackState.anchorNum; i++)
340 {
341 HanningWindows.push_back(m: baseWindows);
342 }
343
344 return HanningWindows;
345}
346
347Mat TrackerDaSiamRPNImpl::generateAnchors()
348{
349 int totalStride = trackState.totalStride, scales = trackState.scale, scoreSize = trackState.scoreSize;
350 std::vector<float> ratios = trackState.ratios;
351 std::vector<Rect2f> baseAnchors;
352 int anchorNum = int(ratios.size());
353 int size = totalStride * totalStride;
354
355 float ori = -(float(scoreSize / 2)) * float(totalStride);
356
357 for (auto i = 0; i < anchorNum; i++)
358 {
359 int ws = int(sqrt(x: size / ratios[i]));
360 int hs = int(ws * ratios[i]);
361
362 float wws = float(ws) * scales;
363 float hhs = float(hs) * scales;
364 Rect2f anchor = { 0, 0, wws, hhs };
365 baseAnchors.push_back(x: anchor);
366 }
367
368 int anchorIndex[4] = { 0, 0, 0, 0 };
369 const int sizes[4] = { 4, (int)ratios.size(), scoreSize, scoreSize };
370 Mat anchors(4, sizes, CV_32F);
371
372 for (auto i = 0; i < scoreSize; i++)
373 {
374 for (auto j = 0; j < scoreSize; j++)
375 {
376 for (auto k = 0; k < anchorNum; k++)
377 {
378 anchorIndex[0] = 1, anchorIndex[1] = k, anchorIndex[2] = i, anchorIndex[3] = j;
379 anchors.at<float>(idx: anchorIndex) = ori + totalStride * i;
380
381 anchorIndex[0] = 0;
382 anchors.at<float>(idx: anchorIndex) = ori + totalStride * j;
383
384 anchorIndex[0] = 2;
385 anchors.at<float>(idx: anchorIndex) = baseAnchors[k].width;
386
387 anchorIndex[0] = 3;
388 anchors.at<float>(idx: anchorIndex) = baseAnchors[k].height;
389 }
390 }
391 }
392
393 return anchors;
394}
395
396Mat TrackerDaSiamRPNImpl::getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans)
397{
398 Mat zCrop, dst;
399 Size imgSize = img.size();
400 float c = (originalSize + 1) / 2;
401 float xMin = (float)cvRound(value: targetBox.x - c);
402 float xMax = xMin + originalSize - 1;
403 float yMin = (float)cvRound(value: targetBox.y - c);
404 float yMax = yMin + originalSize - 1;
405
406 int leftPad = (int)(fmax(x: 0., y: -xMin));
407 int topPad = (int)(fmax(x: 0., y: -yMin));
408 int rightPad = (int)(fmax(x: 0., y: xMax - imgSize.width + 1));
409 int bottomPad = (int)(fmax(x: 0., y: yMax - imgSize.height + 1));
410
411 xMin = xMin + leftPad;
412 xMax = xMax + leftPad;
413 yMax = yMax + topPad;
414 yMin = yMin + topPad;
415
416 if (topPad == 0 && bottomPad == 0 && leftPad == 0 && rightPad == 0)
417 {
418 img(Rect(int(xMin), int(yMin), int(xMax - xMin + 1), int(yMax - yMin + 1))).copyTo(m: zCrop);
419 }
420 else
421 {
422 copyMakeBorder(src: img, dst, top: topPad, bottom: bottomPad, left: leftPad, right: rightPad, borderType: BORDER_CONSTANT, value: avgChans);
423 dst(Rect(int(xMin), int(yMin), int(xMax - xMin + 1), int(yMax - yMin + 1))).copyTo(m: zCrop);
424 }
425
426 return zCrop;
427}
428Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters)
429{
430 return makePtr<TrackerDaSiamRPNImpl>(a1: parameters);
431}
432
433#else // OPENCV_HAVE_DNN
434Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters)
435{
436 (void)(parameters);
437 CV_Error(cv::Error::StsNotImplemented, "to use GOTURN, the tracking module needs to be built with opencv_dnn !");
438}
439#endif // OPENCV_HAVE_DNN
440}
441

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of opencv/modules/video/src/tracking/tracker_dasiamrpn.cpp