1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5// This file is modified from the https://github.com/HonglinChu/NanoTrack/blob/master/ncnn_macos_nanotrack/nanotrack.cpp
6// Author, HongLinChu, 1628464345@qq.com
7// Adapt to OpenCV, ZihaoMu: zihaomu@outlook.com
8
9// Link to original inference code: https://github.com/HonglinChu/NanoTrack
10// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
11
12#include "../precomp.hpp"
13#ifdef HAVE_OPENCV_DNN
14#include "opencv2/dnn.hpp"
15#endif
16
17namespace cv {
18
19TrackerNano::TrackerNano()
20{
21 // nothing
22}
23
24TrackerNano::~TrackerNano()
25{
26 // nothing
27}
28
29TrackerNano::Params::Params()
30{
31 backbone = "backbone.onnx";
32 neckhead = "neckhead.onnx";
33#ifdef HAVE_OPENCV_DNN
34 backend = dnn::DNN_BACKEND_DEFAULT;
35 target = dnn::DNN_TARGET_CPU;
36#else
37 backend = -1; // invalid value
38 target = -1; // invalid value
39#endif
40}
41
42#ifdef HAVE_OPENCV_DNN
43static void softmax(const Mat& src, Mat& dst)
44{
45 Mat maxVal;
46 cv::max(src1: src.row(y: 1), src2: src.row(y: 0), dst&: maxVal);
47
48 src.row(y: 1) -= maxVal;
49 src.row(y: 0) -= maxVal;
50
51 exp(src, dst);
52
53 Mat sumVal = dst.row(y: 0) + dst.row(y: 1);
54 dst.row(y: 0) = dst.row(y: 0) / sumVal;
55 dst.row(y: 1) = dst.row(y: 1) / sumVal;
56}
57
58static float sizeCal(float w, float h)
59{
60 float pad = (w + h) * 0.5f;
61 float sz2 = (w + pad) * (h + pad);
62 return sqrt(x: sz2);
63}
64
65static Mat sizeCal(const Mat& w, const Mat& h)
66{
67 Mat pad = (w + h) * 0.5;
68 Mat sz2 = (w + pad).mul(e: (h + pad));
69
70 cv::sqrt(src: sz2, dst: sz2);
71 return sz2;
72}
73
74// Similar python code: r = np.maximum(r, 1. / r) # r is matrix
75static void elementReciprocalMax(Mat& srcDst)
76{
77 size_t totalV = srcDst.total();
78 float* ptr = srcDst.ptr<float>(y: 0);
79 for (size_t i = 0; i < totalV; i++)
80 {
81 float val = *(ptr + i);
82 *(ptr + i) = std::max(a: val, b: 1.0f/val);
83 }
84}
85
86class TrackerNanoImpl : public TrackerNano
87{
88public:
89 TrackerNanoImpl(const TrackerNano::Params& parameters)
90 : params(parameters)
91 {
92 backbone = dnn::readNet(model: params.backbone);
93 neckhead = dnn::readNet(model: params.neckhead);
94
95 CV_Assert(!backbone.empty());
96 CV_Assert(!neckhead.empty());
97
98 backbone.setPreferableBackend(params.backend);
99 backbone.setPreferableTarget(params.target);
100 neckhead.setPreferableBackend(params.backend);
101 neckhead.setPreferableTarget(params.target);
102 }
103
104 void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
105 bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
106 float getTrackingScore() CV_OVERRIDE;
107
108 // Save the target bounding box for each frame.
109 std::vector<float> targetSz = {0, 0}; // H and W of bounding box
110 std::vector<float> targetPos = {0, 0}; // center point of bounding box (x, y)
111 float tracking_score;
112
113 TrackerNano::Params params;
114
115 struct trackerConfig
116 {
117 float windowInfluence = 0.455f;
118 float lr = 0.37f;
119 float contextAmount = 0.5;
120 bool swapRB = true;
121 int totalStride = 16;
122 float penaltyK = 0.055f;
123 };
124
125protected:
126 const int exemplarSize = 127;
127 const int instanceSize = 255;
128
129 trackerConfig trackState;
130 int scoreSize;
131 Size imgSize = {0, 0};
132 Mat hanningWindow;
133 Mat grid2searchX, grid2searchY;
134
135 dnn::Net backbone, neckhead;
136 Mat image;
137
138 void getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz);
139 void generateGrids();
140};
141
142void TrackerNanoImpl::generateGrids()
143{
144 int sz = scoreSize;
145 const int sz2 = sz / 2;
146
147 std::vector<float> x1Vec(sz, 0);
148
149 for (int i = 0; i < sz; i++)
150 {
151 x1Vec[i] = (float)(i - sz2);
152 }
153
154 Mat x1M(1, sz, CV_32FC1, x1Vec.data());
155
156 cv::repeat(src: x1M, ny: sz, nx: 1, dst: grid2searchX);
157 cv::repeat(src: x1M.t(), ny: 1, nx: sz, dst: grid2searchY);
158
159 grid2searchX *= trackState.totalStride;
160 grid2searchY *= trackState.totalStride;
161
162 grid2searchX += instanceSize/2;
163 grid2searchY += instanceSize/2;
164}
165
166void TrackerNanoImpl::init(InputArray image_, const Rect &boundingBox_)
167{
168 scoreSize = (instanceSize - exemplarSize) / trackState.totalStride + 8;
169 trackState = trackerConfig();
170 image = image_.getMat().clone();
171
172 // convert Rect2d from left-up to center.
173 targetPos[0] = float(boundingBox_.x) + float(boundingBox_.width) * 0.5f;
174 targetPos[1] = float(boundingBox_.y) + float(boundingBox_.height) * 0.5f;
175
176 targetSz[0] = float(boundingBox_.width);
177 targetSz[1] = float(boundingBox_.height);
178
179 imgSize = image.size();
180
181 // Extent the bounding box.
182 float sumSz = targetSz[0] + targetSz[1];
183 float wExtent = targetSz[0] + trackState.contextAmount * (sumSz);
184 float hExtent = targetSz[1] + trackState.contextAmount * (sumSz);
185 int sz = int(cv::sqrt(x: wExtent * hExtent));
186
187 Mat crop;
188 getSubwindow(dstCrop&: crop, srcImg&: image, originalSz: sz, resizeSz: exemplarSize);
189 Mat blob = dnn::blobFromImage(image: crop, scalefactor: 1.0, size: Size(), mean: Scalar(), swapRB: trackState.swapRB);
190
191 backbone.setInput(blob);
192 Mat out = backbone.forward(); // Feature extraction.
193 neckhead.setInput(blob: out, name: "input1");
194
195 createHanningWindow(dst: hanningWindow, winSize: Size(scoreSize, scoreSize), CV_32F);
196 generateGrids();
197}
198
199void TrackerNanoImpl::getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz)
200{
201 Scalar avgChans = mean(src: srcImg);
202 Size imgSz = srcImg.size();
203 int c = (originalSz + 1) / 2;
204
205 int context_xmin = (int)(targetPos[0]) - c;
206 int context_xmax = context_xmin + originalSz - 1;
207 int context_ymin = (int)(targetPos[1]) - c;
208 int context_ymax = context_ymin + originalSz - 1;
209
210 int left_pad = std::max(a: 0, b: -context_xmin);
211 int top_pad = std::max(a: 0, b: -context_ymin);
212 int right_pad = std::max(a: 0, b: context_xmax - imgSz.width + 1);
213 int bottom_pad = std::max(a: 0, b: context_ymax - imgSz.height + 1);
214
215 context_xmin += left_pad;
216 context_xmax += left_pad;
217 context_ymin += top_pad;
218 context_ymax += top_pad;
219
220 Mat cropImg;
221 if (left_pad == 0 && top_pad == 0 && right_pad == 0 && bottom_pad == 0)
222 {
223 // Crop image without padding.
224 cropImg = srcImg(cv::Rect(context_xmin, context_ymin,
225 context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
226 }
227 else // Crop image with padding, and the padding value is avgChans
228 {
229 cv::Mat tmpMat;
230 cv::copyMakeBorder(src: srcImg, dst: tmpMat, top: top_pad, bottom: bottom_pad, left: left_pad, right: right_pad, borderType: cv::BORDER_CONSTANT, value: avgChans);
231 cropImg = tmpMat(cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
232 }
233 resize(src: cropImg, dst: dstCrop, dsize: Size(resizeSz, resizeSz));
234}
235
236bool TrackerNanoImpl::update(InputArray image_, Rect &boundingBoxRes)
237{
238 image = image_.getMat().clone();
239 int targetSzSum = (int)(targetSz[0] + targetSz[1]);
240
241 float wc = targetSz[0] + trackState.contextAmount * targetSzSum;
242 float hc = targetSz[1] + trackState.contextAmount * targetSzSum;
243 float sz = cv::sqrt(x: wc * hc);
244 float scale_z = exemplarSize / sz;
245 float sx = sz * (instanceSize / exemplarSize);
246 targetSz[0] *= scale_z;
247 targetSz[1] *= scale_z;
248
249 Mat crop;
250 getSubwindow(dstCrop&: crop, srcImg&: image, originalSz: int(sx), resizeSz: instanceSize);
251
252 Mat blob = dnn::blobFromImage(image: crop, scalefactor: 1.0, size: Size(), mean: Scalar(), swapRB: trackState.swapRB);
253 backbone.setInput(blob);
254 Mat xf = backbone.forward();
255 neckhead.setInput(blob: xf, name: "input2");
256 std::vector<String> outputName = {"output1", "output2"};
257 std::vector<Mat> outs;
258 neckhead.forward(outputBlobs: outs, outBlobNames: outputName);
259
260 CV_Assert(outs.size() == 2);
261
262 Mat clsScore = outs[0]; // 1x2x16x16
263 Mat bboxPred = outs[1]; // 1x4x16x16
264
265 clsScore = clsScore.reshape(cn: 0, newshape: {2, scoreSize, scoreSize});
266 bboxPred = bboxPred.reshape(cn: 0, newshape: {4, scoreSize, scoreSize});
267
268 Mat scoreSoftmax; // 2x16x16
269 softmax(src: clsScore, dst&: scoreSoftmax);
270
271 Mat score = scoreSoftmax.row(y: 1);
272 score = score.reshape(cn: 0, newshape: {scoreSize, scoreSize});
273
274 Mat predX1 = grid2searchX - bboxPred.row(y: 0).reshape(cn: 0, newshape: {scoreSize, scoreSize});
275 Mat predY1 = grid2searchY - bboxPred.row(y: 1).reshape(cn: 0, newshape: {scoreSize, scoreSize});
276 Mat predX2 = grid2searchX + bboxPred.row(y: 2).reshape(cn: 0, newshape: {scoreSize, scoreSize});
277 Mat predY2 = grid2searchY + bboxPred.row(y: 3).reshape(cn: 0, newshape: {scoreSize, scoreSize});
278
279 // size penalty
280 // scale penalty
281 Mat sc = sizeCal(w: predX2 - predX1, h: predY2 - predY1)/sizeCal(w: targetPos[0], h: targetPos[1]);
282 elementReciprocalMax(srcDst&: sc);
283
284 // ratio penalty
285 float ratioVal = targetSz[0] / targetSz[1];
286
287 Mat ratioM(scoreSize, scoreSize, CV_32FC1, Scalar::all(v0: ratioVal));
288 Mat rc = ratioM / ((predX2 - predX1) / (predY2 - predY1));
289 elementReciprocalMax(srcDst&: rc);
290
291 Mat penalty;
292 exp(src: ((rc.mul(m: sc) - 1) * trackState.penaltyK * (-1)), dst: penalty);
293 Mat pscore = penalty.mul(m: score);
294
295 // Window penalty
296 pscore = pscore * (1.0 - trackState.windowInfluence) + hanningWindow * trackState.windowInfluence;
297
298 // get Max
299 int bestID[2] = { 0, 0 };
300 minMaxIdx(src: pscore, minVal: 0, maxVal: 0, minIdx: 0, maxIdx: bestID);
301
302 tracking_score = pscore.at<float>(idx: bestID);
303
304 float x1Val = predX1.at<float>(idx: bestID);
305 float x2Val = predX2.at<float>(idx: bestID);
306 float y1Val = predY1.at<float>(idx: bestID);
307 float y2Val = predY2.at<float>(idx: bestID);
308
309 float predXs = (x1Val + x2Val)/2;
310 float predYs = (y1Val + y2Val)/2;
311 float predW = (x2Val - x1Val)/scale_z;
312 float predH = (y2Val - y1Val)/scale_z;
313
314 float diffXs = (predXs - instanceSize / 2) / scale_z;
315 float diffYs = (predYs - instanceSize / 2) / scale_z;
316
317 targetSz[0] /= scale_z;
318 targetSz[1] /= scale_z;
319
320 float lr = penalty.at<float>(idx: bestID) * score.at<float>(idx: bestID) * trackState.lr;
321
322 float resX = targetPos[0] + diffXs;
323 float resY = targetPos[1] + diffYs;
324 float resW = predW * lr + (1 - lr) * targetSz[0];
325 float resH = predH * lr + (1 - lr) * targetSz[1];
326
327 resX = std::max(a: 0.f, b: std::min(a: (float)imgSize.width, b: resX));
328 resY = std::max(a: 0.f, b: std::min(a: (float)imgSize.height, b: resY));
329 resW = std::max(a: 10.f, b: std::min(a: (float)imgSize.width, b: resW));
330 resH = std::max(a: 10.f, b: std::min(a: (float)imgSize.height, b: resH));
331
332 targetPos[0] = resX;
333 targetPos[1] = resY;
334 targetSz[0] = resW;
335 targetSz[1] = resH;
336
337 // convert center to Rect.
338 boundingBoxRes = { int(resX - resW/2), int(resY - resH/2), int(resW), int(resH)};
339 return true;
340}
341
342float TrackerNanoImpl::getTrackingScore()
343{
344 return tracking_score;
345}
346
347Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
348{
349 return makePtr<TrackerNanoImpl>(a1: parameters);
350}
351
352#else // OPENCV_HAVE_DNN
353Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
354{
355 CV_UNUSED(parameters);
356 CV_Error(cv::Error::StsNotImplemented, "to use NanoTrack, the tracking module needs to be built with opencv_dnn !");
357}
358#endif // OPENCV_HAVE_DNN
359}
360

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of opencv/modules/video/src/tracking/tracker_nano.cpp