1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5// This file is modified from the https://github.com/HonglinChu/NanoTrack/blob/master/ncnn_macos_nanotrack/nanotrack.cpp
6// Author, HongLinChu, 1628464345@qq.com
7// Adapt to OpenCV, ZihaoMu: zihaomu@outlook.com
8
9// Link to original inference code: https://github.com/HonglinChu/NanoTrack
10// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
11
12#include "../precomp.hpp"
13#ifdef HAVE_OPENCV_DNN
14#include "opencv2/dnn.hpp"
15#endif
16
17namespace cv {
18
19TrackerNano::TrackerNano()
20{
21 // nothing
22}
23
24TrackerNano::~TrackerNano()
25{
26 // nothing
27}
28
29TrackerNano::Params::Params()
30{
31 backbone = "backbone.onnx";
32 neckhead = "neckhead.onnx";
33#ifdef HAVE_OPENCV_DNN
34 backend = dnn::DNN_BACKEND_DEFAULT;
35 target = dnn::DNN_TARGET_CPU;
36#else
37 backend = -1; // invalid value
38 target = -1; // invalid value
39#endif
40}
41
42#ifdef HAVE_OPENCV_DNN
43static void softmax(const Mat& src, Mat& dst)
44{
45 Mat maxVal;
46 cv::max(src1: src.row(y: 1), src2: src.row(y: 0), dst&: maxVal);
47
48 src.row(y: 1) -= maxVal;
49 src.row(y: 0) -= maxVal;
50
51 exp(src, dst);
52
53 Mat sumVal = dst.row(y: 0) + dst.row(y: 1);
54 dst.row(y: 0) = dst.row(y: 0) / sumVal;
55 dst.row(y: 1) = dst.row(y: 1) / sumVal;
56}
57
58static float sizeCal(float w, float h)
59{
60 float pad = (w + h) * 0.5f;
61 float sz2 = (w + pad) * (h + pad);
62 return sqrt(x: sz2);
63}
64
65static Mat sizeCal(const Mat& w, const Mat& h)
66{
67 Mat pad = (w + h) * 0.5;
68 Mat sz2 = (w + pad).mul(e: (h + pad));
69
70 cv::sqrt(src: sz2, dst: sz2);
71 return sz2;
72}
73
74// Similar python code: r = np.maximum(r, 1. / r) # r is matrix
75static void elementReciprocalMax(Mat& srcDst)
76{
77 size_t totalV = srcDst.total();
78 float* ptr = srcDst.ptr<float>(y: 0);
79 for (size_t i = 0; i < totalV; i++)
80 {
81 float val = *(ptr + i);
82 *(ptr + i) = std::max(a: val, b: 1.0f/val);
83 }
84}
85
86class TrackerNanoImpl : public TrackerNano
87{
88public:
89 TrackerNanoImpl(const TrackerNano::Params& parameters)
90 {
91 backbone = dnn::readNet(model: parameters.backbone);
92 neckhead = dnn::readNet(model: parameters.neckhead);
93
94 CV_Assert(!backbone.empty());
95 CV_Assert(!neckhead.empty());
96
97 backbone.setPreferableBackend(parameters.backend);
98 backbone.setPreferableTarget(parameters.target);
99 neckhead.setPreferableBackend(parameters.backend);
100 neckhead.setPreferableTarget(parameters.target);
101 }
102
103 TrackerNanoImpl(const dnn::Net& _backbone, const dnn::Net& _neckhead)
104 {
105 CV_Assert(!_backbone.empty());
106 CV_Assert(!_neckhead.empty());
107
108 backbone = _backbone;
109 neckhead = _neckhead;
110 }
111
112 void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
113 bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
114 float getTrackingScore() CV_OVERRIDE;
115
116 // Save the target bounding box for each frame.
117 std::vector<float> targetSz = {0, 0}; // H and W of bounding box
118 std::vector<float> targetPos = {0, 0}; // center point of bounding box (x, y)
119 float tracking_score;
120
121 struct trackerConfig
122 {
123 float windowInfluence = 0.455f;
124 float lr = 0.37f;
125 float contextAmount = 0.5;
126 bool swapRB = true;
127 int totalStride = 16;
128 float penaltyK = 0.055f;
129 };
130
131protected:
132 const int exemplarSize = 127;
133 const int instanceSize = 255;
134
135 trackerConfig trackState;
136 int scoreSize;
137 Size imgSize = {0, 0};
138 Mat hanningWindow;
139 Mat grid2searchX, grid2searchY;
140
141 dnn::Net backbone, neckhead;
142 Mat image;
143
144 void getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz);
145 void generateGrids();
146};
147
148void TrackerNanoImpl::generateGrids()
149{
150 int sz = scoreSize;
151 const int sz2 = sz / 2;
152
153 std::vector<float> x1Vec(sz, 0);
154
155 for (int i = 0; i < sz; i++)
156 {
157 x1Vec[i] = (float)(i - sz2);
158 }
159
160 Mat x1M(1, sz, CV_32FC1, x1Vec.data());
161
162 cv::repeat(src: x1M, ny: sz, nx: 1, dst: grid2searchX);
163 cv::repeat(src: x1M.t(), ny: 1, nx: sz, dst: grid2searchY);
164
165 grid2searchX *= trackState.totalStride;
166 grid2searchY *= trackState.totalStride;
167
168 grid2searchX += instanceSize/2;
169 grid2searchY += instanceSize/2;
170}
171
172void TrackerNanoImpl::init(InputArray image_, const Rect &boundingBox_)
173{
174 scoreSize = (instanceSize - exemplarSize) / trackState.totalStride + 8;
175 trackState = trackerConfig();
176 image = image_.getMat().clone();
177
178 // convert Rect2d from left-up to center.
179 targetPos[0] = float(boundingBox_.x) + float(boundingBox_.width) * 0.5f;
180 targetPos[1] = float(boundingBox_.y) + float(boundingBox_.height) * 0.5f;
181
182 targetSz[0] = float(boundingBox_.width);
183 targetSz[1] = float(boundingBox_.height);
184
185 imgSize = image.size();
186
187 // Extent the bounding box.
188 float sumSz = targetSz[0] + targetSz[1];
189 float wExtent = targetSz[0] + trackState.contextAmount * (sumSz);
190 float hExtent = targetSz[1] + trackState.contextAmount * (sumSz);
191 int sz = int(cv::sqrt(x: wExtent * hExtent));
192
193 Mat crop;
194 getSubwindow(dstCrop&: crop, srcImg&: image, originalSz: sz, resizeSz: exemplarSize);
195 Mat blob = dnn::blobFromImage(image: crop, scalefactor: 1.0, size: Size(), mean: Scalar(), swapRB: trackState.swapRB);
196
197 backbone.setInput(blob);
198 Mat out = backbone.forward(); // Feature extraction.
199 neckhead.setInput(blob: out, name: "input1");
200
201 createHanningWindow(dst: hanningWindow, winSize: Size(scoreSize, scoreSize), CV_32F);
202 generateGrids();
203}
204
205void TrackerNanoImpl::getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz)
206{
207 Scalar avgChans = mean(src: srcImg);
208 Size imgSz = srcImg.size();
209 int c = (originalSz + 1) / 2;
210
211 int context_xmin = (int)(targetPos[0]) - c;
212 int context_xmax = context_xmin + originalSz - 1;
213 int context_ymin = (int)(targetPos[1]) - c;
214 int context_ymax = context_ymin + originalSz - 1;
215
216 int left_pad = std::max(a: 0, b: -context_xmin);
217 int top_pad = std::max(a: 0, b: -context_ymin);
218 int right_pad = std::max(a: 0, b: context_xmax - imgSz.width + 1);
219 int bottom_pad = std::max(a: 0, b: context_ymax - imgSz.height + 1);
220
221 context_xmin += left_pad;
222 context_xmax += left_pad;
223 context_ymin += top_pad;
224 context_ymax += top_pad;
225
226 Mat cropImg;
227 if (left_pad == 0 && top_pad == 0 && right_pad == 0 && bottom_pad == 0)
228 {
229 // Crop image without padding.
230 cropImg = srcImg(cv::Rect(context_xmin, context_ymin,
231 context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
232 }
233 else // Crop image with padding, and the padding value is avgChans
234 {
235 cv::Mat tmpMat;
236 cv::copyMakeBorder(src: srcImg, dst: tmpMat, top: top_pad, bottom: bottom_pad, left: left_pad, right: right_pad, borderType: cv::BORDER_CONSTANT, value: avgChans);
237 cropImg = tmpMat(cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
238 }
239 resize(src: cropImg, dst: dstCrop, dsize: Size(resizeSz, resizeSz));
240}
241
242bool TrackerNanoImpl::update(InputArray image_, Rect &boundingBoxRes)
243{
244 image = image_.getMat().clone();
245 int targetSzSum = (int)(targetSz[0] + targetSz[1]);
246
247 float wc = targetSz[0] + trackState.contextAmount * targetSzSum;
248 float hc = targetSz[1] + trackState.contextAmount * targetSzSum;
249 float sz = cv::sqrt(x: wc * hc);
250 float scale_z = exemplarSize / sz;
251 float sx = sz * (instanceSize / exemplarSize);
252 targetSz[0] *= scale_z;
253 targetSz[1] *= scale_z;
254
255 Mat crop;
256 getSubwindow(dstCrop&: crop, srcImg&: image, originalSz: int(sx), resizeSz: instanceSize);
257
258 Mat blob = dnn::blobFromImage(image: crop, scalefactor: 1.0, size: Size(), mean: Scalar(), swapRB: trackState.swapRB);
259 backbone.setInput(blob);
260 Mat xf = backbone.forward();
261 neckhead.setInput(blob: xf, name: "input2");
262 std::vector<String> outputName = {"output1", "output2"};
263 std::vector<Mat> outs;
264 neckhead.forward(outputBlobs: outs, outBlobNames: outputName);
265
266 CV_Assert(outs.size() == 2);
267
268 Mat clsScore = outs[0]; // 1x2x16x16
269 Mat bboxPred = outs[1]; // 1x4x16x16
270
271 clsScore = clsScore.reshape(cn: 0, newshape: {2, scoreSize, scoreSize});
272 bboxPred = bboxPred.reshape(cn: 0, newshape: {4, scoreSize, scoreSize});
273
274 Mat scoreSoftmax; // 2x16x16
275 softmax(src: clsScore, dst&: scoreSoftmax);
276
277 Mat score = scoreSoftmax.row(y: 1);
278 score = score.reshape(cn: 0, newshape: {scoreSize, scoreSize});
279
280 Mat predX1 = grid2searchX - bboxPred.row(y: 0).reshape(cn: 0, newshape: {scoreSize, scoreSize});
281 Mat predY1 = grid2searchY - bboxPred.row(y: 1).reshape(cn: 0, newshape: {scoreSize, scoreSize});
282 Mat predX2 = grid2searchX + bboxPred.row(y: 2).reshape(cn: 0, newshape: {scoreSize, scoreSize});
283 Mat predY2 = grid2searchY + bboxPred.row(y: 3).reshape(cn: 0, newshape: {scoreSize, scoreSize});
284
285 // size penalty
286 // scale penalty
287 Mat sc = sizeCal(w: predX2 - predX1, h: predY2 - predY1)/sizeCal(w: targetPos[0], h: targetPos[1]);
288 elementReciprocalMax(srcDst&: sc);
289
290 // ratio penalty
291 float ratioVal = targetSz[0] / targetSz[1];
292
293 Mat ratioM(scoreSize, scoreSize, CV_32FC1, Scalar::all(v0: ratioVal));
294 Mat rc = ratioM / ((predX2 - predX1) / (predY2 - predY1));
295 elementReciprocalMax(srcDst&: rc);
296
297 Mat penalty;
298 exp(src: ((rc.mul(m: sc) - 1) * trackState.penaltyK * (-1)), dst: penalty);
299 Mat pscore = penalty.mul(m: score);
300
301 // Window penalty
302 pscore = pscore * (1.0 - trackState.windowInfluence) + hanningWindow * trackState.windowInfluence;
303
304 // get Max
305 int bestID[2] = { 0, 0 };
306 minMaxIdx(src: pscore, minVal: 0, maxVal: 0, minIdx: 0, maxIdx: bestID);
307
308 tracking_score = pscore.at<float>(idx: bestID);
309
310 float x1Val = predX1.at<float>(idx: bestID);
311 float x2Val = predX2.at<float>(idx: bestID);
312 float y1Val = predY1.at<float>(idx: bestID);
313 float y2Val = predY2.at<float>(idx: bestID);
314
315 float predXs = (x1Val + x2Val)/2;
316 float predYs = (y1Val + y2Val)/2;
317 float predW = (x2Val - x1Val)/scale_z;
318 float predH = (y2Val - y1Val)/scale_z;
319
320 float diffXs = (predXs - instanceSize / 2) / scale_z;
321 float diffYs = (predYs - instanceSize / 2) / scale_z;
322
323 targetSz[0] /= scale_z;
324 targetSz[1] /= scale_z;
325
326 float lr = penalty.at<float>(idx: bestID) * score.at<float>(idx: bestID) * trackState.lr;
327
328 float resX = targetPos[0] + diffXs;
329 float resY = targetPos[1] + diffYs;
330 float resW = predW * lr + (1 - lr) * targetSz[0];
331 float resH = predH * lr + (1 - lr) * targetSz[1];
332
333 resX = std::max(a: 0.f, b: std::min(a: (float)imgSize.width, b: resX));
334 resY = std::max(a: 0.f, b: std::min(a: (float)imgSize.height, b: resY));
335 resW = std::max(a: 10.f, b: std::min(a: (float)imgSize.width, b: resW));
336 resH = std::max(a: 10.f, b: std::min(a: (float)imgSize.height, b: resH));
337
338 targetPos[0] = resX;
339 targetPos[1] = resY;
340 targetSz[0] = resW;
341 targetSz[1] = resH;
342
343 // convert center to Rect.
344 boundingBoxRes = { int(resX - resW/2), int(resY - resH/2), int(resW), int(resH)};
345 return true;
346}
347
348float TrackerNanoImpl::getTrackingScore()
349{
350 return tracking_score;
351}
352
353Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
354{
355 return makePtr<TrackerNanoImpl>(a1: parameters);
356}
357
358Ptr<TrackerNano> TrackerNano::create(const dnn::Net& backbone, const dnn::Net& neckhead)
359{
360 return makePtr<TrackerNanoImpl>(a1: backbone, a1: neckhead);
361}
362
363#else // OPENCV_HAVE_DNN
364Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
365{
366 CV_UNUSED(parameters);
367 CV_Error(cv::Error::StsNotImplemented, "to use NanoTrack, the tracking module needs to be built with opencv_dnn !");
368}
369#endif // OPENCV_HAVE_DNN
370}
371

source code of opencv/modules/video/src/tracking/tracker_nano.cpp