tracker_nano.cpp source code [opencv/modules/video/src/tracking/tracker_nano.cpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html.
4
5	// This file is modified from the https://github.com/HonglinChu/NanoTrack/blob/master/ncnn_macos_nanotrack/nanotrack.cpp
6	// Author, HongLinChu, 1628464345@qq.com
7	// Adapt to OpenCV, ZihaoMu: zihaomu@outlook.com
8
9	// Link to original inference code: https://github.com/HonglinChu/NanoTrack
10	// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
11
12	#include "../precomp.hpp"
13	#ifdef HAVE_OPENCV_DNN
14	#include "opencv2/dnn.hpp"
15	#endif
16
17	namespace cv {
18
19	TrackerNano::TrackerNano()
20	{
21	// nothing
22	}
23
24	TrackerNano::~TrackerNano()
25	{
26	// nothing
27	}
28
29	TrackerNano::Params::Params()
30	{
31	backbone = "backbone.onnx";
32	neckhead = "neckhead.onnx";
33	#ifdef HAVE_OPENCV_DNN
34	backend = dnn::DNN_BACKEND_DEFAULT;
35	target = dnn::DNN_TARGET_CPU;
36	#else
37	backend = -`1`; // invalid value
38	target = -`1`; // invalid value
39	#endif
40	}
41
42	#ifdef HAVE_OPENCV_DNN
43	static void softmax(const Mat& src, Mat& dst)
44	{
45	Mat maxVal;
46	cv::max(src1: src.row(y: `1`), src2: src.row(y: `0`), dst&: maxVal);
47
48	src.row(y: `1`) -= maxVal;
49	src.row(y: `0`) -= maxVal;
50
51	exp(src, dst);
52
53	Mat sumVal = dst.row(y: `0`) + dst.row(y: `1`);
54	dst.row(y: `0`) = dst.row(y: `0`) / sumVal;
55	dst.row(y: `1`) = dst.row(y: `1`) / sumVal;
56	}
57
58	static float sizeCal(float w, float h)
59	{
60	float pad = (w + h) * `0.5f`;
61	float sz2 = (w + pad) * (h + pad);
62	return sqrt(x: sz2);
63	}
64
65	static Mat sizeCal(const Mat& w, const Mat& h)
66	{
67	Mat pad = (w + h) * `0.5`;
68	Mat sz2 = (w + pad).mul(e: (h + pad));
69
70	cv::sqrt(src: sz2, dst: sz2);
71	return sz2;
72	}
73
74	// Similar python code: r = np.maximum(r, 1. / r) # r is matrix
75	static void elementReciprocalMax(Mat& srcDst)
76	{
77	size_t totalV = srcDst.total();
78	float* ptr = srcDst.ptr<float>(y: `0`);
79	for (size_t i = `0`; i < totalV; i++)
80	{
81	float val = *(ptr + i);
82	*(ptr + i) = std::max(a: val, b: `1.0f`/val);
83	}
84	}
85
86	class TrackerNanoImpl : public TrackerNano
87	{
88	public:
89	TrackerNanoImpl(const TrackerNano::Params& parameters)
90	: params (parameters)
91	{
92	backbone = dnn::readNet(model: params.backbone);
93	neckhead = dnn::readNet(model: params.neckhead);
94
95	CV_Assert(!backbone.empty());
96	CV_Assert(!neckhead.empty());
97
98	backbone.setPreferableBackend(params.backend);
99	backbone.setPreferableTarget(params.target);
100	neckhead.setPreferableBackend(params.backend);
101	neckhead.setPreferableTarget(params.target);
102	}
103
104	void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
105	bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
106	float getTrackingScore() CV_OVERRIDE;
107
108	// Save the target bounding box for each frame.
109	std::vector<float> targetSz = {`0`, `0`}; // H and W of bounding box
110	std::vector<float> targetPos = {`0`, `0`}; // center point of bounding box (x, y)
111	float tracking_score;
112
113	TrackerNano::Params params;
114
115	struct trackerConfig
116	{
117	float windowInfluence = `0.455f`;
118	float lr = `0.37f`;
119	float contextAmount = `0.5`;
120	bool swapRB = true;
121	int totalStride = `16`;
122	float penaltyK = `0.055f`;
123	};
124
125	protected:
126	const int exemplarSize = `127`;
127	const int instanceSize = `255`;
128
129	trackerConfig trackState;
130	int scoreSize;
131	Size imgSize = {`0`, `0`};
132	Mat hanningWindow;
133	Mat grid2searchX, grid2searchY;
134
135	dnn::Net backbone, neckhead;
136	Mat image;
137
138	void getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz);
139	void generateGrids();
140	};
141
142	void TrackerNanoImpl::generateGrids()
143	{
144	int sz = scoreSize;
145	const int sz2 = sz / `2`;
146
147	std::vector<float> x1Vec(sz, `0`);
148
149	for (int i = `0`; i < sz; i++)
150	{
151	x1Vec [i] = (float)(i - sz2);
152	}
153
154	Mat x1M(`1`, sz, CV_32FC1, x1Vec.data());
155
156	cv::repeat(src: x1M, ny: sz, nx: `1`, dst: grid2searchX);
157	cv::repeat(src: x1M.t(), ny: `1`, nx: sz, dst: grid2searchY);
158
159	grid2searchX *= trackState.totalStride;
160	grid2searchY *= trackState.totalStride;
161
162	grid2searchX += instanceSize/`2`;
163	grid2searchY += instanceSize/`2`;
164	}
165
166	void TrackerNanoImpl::init(InputArray image_, const Rect &boundingBox_)
167	{
168	scoreSize = (instanceSize - exemplarSize) / trackState.totalStride + `8`;
169	trackState = trackerConfig ();
170	image = image_.getMat().clone();
171
172	// convert Rect2d from left-up to center.
173	targetPos [`0`] = float(boundingBox_.x) + float(boundingBox_.width) * `0.5f`;
174	targetPos [`1`] = float(boundingBox_.y) + float(boundingBox_.height) * `0.5f`;
175
176	targetSz [`0`] = float(boundingBox_.width);
177	targetSz [`1`] = float(boundingBox_.height);
178
179	imgSize = image.size ();
180
181	// Extent the bounding box.
182	float sumSz = targetSz [`0`] + targetSz [`1`];
183	float wExtent = targetSz [`0`] + trackState.contextAmount * (sumSz);
184	float hExtent = targetSz [`1`] + trackState.contextAmount * (sumSz);
185	int sz = int(cv::sqrt(x: wExtent * hExtent));
186
187	Mat crop;
188	getSubwindow(dstCrop&: crop, srcImg&: image, originalSz: sz, resizeSz: exemplarSize);
189	Mat blob = dnn::blobFromImage(image: crop, scalefactor: `1.0`, size: Size (), mean: Scalar (), swapRB: trackState.swapRB);
190
191	backbone.setInput(blob);
192	Mat out = backbone.forward(); // Feature extraction.
193	neckhead.setInput(blob: out, name: "input1");
194
195	createHanningWindow(dst: hanningWindow, winSize: Size (scoreSize, scoreSize), CV_32F);
196	generateGrids();
197	}
198
199	void TrackerNanoImpl::getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz)
200	{
201	Scalar avgChans = mean(src: srcImg);
202	Size imgSz = srcImg.size ();
203	int c = (originalSz + `1`) / `2`;
204
205	int context_xmin = (int)(targetPos [`0`]) - c;
206	int context_xmax = context_xmin + originalSz - `1`;
207	int context_ymin = (int)(targetPos [`1`]) - c;
208	int context_ymax = context_ymin + originalSz - `1`;
209
210	int left_pad = std::max(a: `0`, b: -context_xmin);
211	int top_pad = std::max(a: `0`, b: -context_ymin);
212	int right_pad = std::max(a: `0`, b: context_xmax - imgSz.width + `1`);
213	int bottom_pad = std::max(a: `0`, b: context_ymax - imgSz.height + `1`);
214
215	context_xmin += left_pad;
216	context_xmax += left_pad;
217	context_ymin += top_pad;
218	context_ymax += top_pad;
219
220	Mat cropImg;
221	if (left_pad == `0` && top_pad == `0` && right_pad == `0` && bottom_pad == `0`)
222	{
223	// Crop image without padding.
224	cropImg = srcImg (cv::Rect (context_xmin, context_ymin,
225	context_xmax - context_xmin + `1`, context_ymax - context_ymin + `1`));
226	}
227	else // Crop image with padding, and the padding value is avgChans
228	{
229	cv::Mat tmpMat;
230	cv::copyMakeBorder(src: srcImg, dst: tmpMat, top: top_pad, bottom: bottom_pad, left: left_pad, right: right_pad, borderType: cv::BORDER_CONSTANT, value: avgChans);
231	cropImg = tmpMat (cv::Rect (context_xmin, context_ymin, context_xmax - context_xmin + `1`, context_ymax - context_ymin + `1`));
232	}
233	resize(src: cropImg, dst: dstCrop, dsize: Size (resizeSz, resizeSz));
234	}
235
236	bool TrackerNanoImpl::update(InputArray image_, Rect &boundingBoxRes)
237	{
238	image = image_.getMat().clone();
239	int targetSzSum = (int)(targetSz [`0`] + targetSz [`1`]);
240
241	float wc = targetSz [`0`] + trackState.contextAmount * targetSzSum;
242	float hc = targetSz [`1`] + trackState.contextAmount * targetSzSum;
243	float sz = cv::sqrt(x: wc * hc);
244	float scale_z = exemplarSize / sz;
245	float sx = sz * (instanceSize / exemplarSize);
246	targetSz [`0`] *= scale_z;
247	targetSz [`1`] *= scale_z;
248
249	Mat crop;
250	getSubwindow(dstCrop&: crop, srcImg&: image, originalSz: int(sx), resizeSz: instanceSize);
251
252	Mat blob = dnn::blobFromImage(image: crop, scalefactor: `1.0`, size: Size (), mean: Scalar (), swapRB: trackState.swapRB);
253	backbone.setInput(blob);
254	Mat xf = backbone.forward();
255	neckhead.setInput(blob: xf, name: "input2");
256	std::vector<String> outputName = {"output1", "output2"};
257	std::vector<Mat> outs;
258	neckhead.forward(outputBlobs: outs, outBlobNames: outputName);
259
260	CV_Assert(outs.size() == `2`);
261
262	Mat clsScore = outs [`0`]; // 1x2x16x16
263	Mat bboxPred = outs [`1`]; // 1x4x16x16
264
265	clsScore = clsScore.reshape(cn: `0`, newshape: {`2`, scoreSize, scoreSize});
266	bboxPred = bboxPred.reshape(cn: `0`, newshape: {`4`, scoreSize, scoreSize});
267
268	Mat scoreSoftmax; // 2x16x16
269	softmax(src: clsScore, dst&: scoreSoftmax);
270
271	Mat score = scoreSoftmax.row(y: `1`);
272	score = score.reshape(cn: `0`, newshape: {scoreSize, scoreSize});
273
274	Mat predX1 = grid2searchX - bboxPred.row(y: `0`).reshape(cn: `0`, newshape: {scoreSize, scoreSize});
275	Mat predY1 = grid2searchY - bboxPred.row(y: `1`).reshape(cn: `0`, newshape: {scoreSize, scoreSize});
276	Mat predX2 = grid2searchX + bboxPred.row(y: `2`).reshape(cn: `0`, newshape: {scoreSize, scoreSize});
277	Mat predY2 = grid2searchY + bboxPred.row(y: `3`).reshape(cn: `0`, newshape: {scoreSize, scoreSize});
278
279	// size penalty
280	// scale penalty
281	Mat sc = sizeCal(w: predX2 - predX1, h: predY2 - predY1)/sizeCal(w: targetPos [`0`], h: targetPos [`1`]);
282	elementReciprocalMax(srcDst&: sc);
283
284	// ratio penalty
285	float ratioVal = targetSz [`0`] / targetSz [`1`];
286
287	Mat ratioM(scoreSize, scoreSize, CV_32FC1, Scalar::all(v0: ratioVal));
288	Mat rc = ratioM / ((predX2 - predX1) / (predY2 - predY1));
289	elementReciprocalMax(srcDst&: rc);
290
291	Mat penalty;
292	exp(src: ((rc.mul(m: sc) - `1`) * trackState.penaltyK * (-`1`)), dst: penalty);
293	Mat pscore = penalty.mul(m: score);
294
295	// Window penalty
296	pscore = pscore * (`1.0` - trackState.windowInfluence) + hanningWindow * trackState.windowInfluence;
297
298	// get Max
299	int bestID[`2`] = { `0`, `0` };
300	minMaxIdx(src: pscore, minVal: `0`, maxVal: `0`, minIdx: `0`, maxIdx: bestID);
301
302	tracking_score = pscore.at<float>(idx: bestID);
303
304	float x1Val = predX1.at<float>(idx: bestID);
305	float x2Val = predX2.at<float>(idx: bestID);
306	float y1Val = predY1.at<float>(idx: bestID);
307	float y2Val = predY2.at<float>(idx: bestID);
308
309	float predXs = (x1Val + x2Val)/`2`;
310	float predYs = (y1Val + y2Val)/`2`;
311	float predW = (x2Val - x1Val)/scale_z;
312	float predH = (y2Val - y1Val)/scale_z;
313
314	float diffXs = (predXs - instanceSize / `2`) / scale_z;
315	float diffYs = (predYs - instanceSize / `2`) / scale_z;
316
317	targetSz [`0`] /= scale_z;
318	targetSz [`1`] /= scale_z;
319
320	float lr = penalty.at<float>(idx: bestID) * score.at<float>(idx: bestID) * trackState.lr;
321
322	float resX = targetPos [`0`] + diffXs;
323	float resY = targetPos [`1`] + diffYs;
324	float resW = predW * lr + (`1` - lr) * targetSz [`0`];
325	float resH = predH * lr + (`1` - lr) * targetSz [`1`];
326
327	resX = std::max(a: `0.f`, b: std::min(a: (float)imgSize.width, b: resX));
328	resY = std::max(a: `0.f`, b: std::min(a: (float)imgSize.height, b: resY));
329	resW = std::max(a: `10.f`, b: std::min(a: (float)imgSize.width, b: resW));
330	resH = std::max(a: `10.f`, b: std::min(a: (float)imgSize.height, b: resH));
331
332	targetPos [`0`] = resX;
333	targetPos [`1`] = resY;
334	targetSz [`0`] = resW;
335	targetSz [`1`] = resH;
336
337	// convert center to Rect.
338	boundingBoxRes = { int(resX - resW/`2`), int(resY - resH/`2`), int(resW), int(resH)};
339	return true;
340	}
341
342	float TrackerNanoImpl::getTrackingScore()
343	{
344	return tracking_score;
345	}
346
347	Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
348	{
349	return makePtr<TrackerNanoImpl>(a1: parameters);
350	}
351
352	#else // OPENCV_HAVE_DNN
353	Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
354	{
355	CV_UNUSED(parameters);
356	CV_Error(cv::Error::StsNotImplemented, "to use NanoTrack, the tracking module needs to be built with opencv_dnn !");
357	}
358	#endif // OPENCV_HAVE_DNN
359	}
360

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of opencv/modules/video/src/tracking/tracker_nano.cpp