net_quantization.cpp source code [opencv/modules/dnn/src/net_quantization.cpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html.
4
5	#include "precomp.hpp"
6
7	#include "net_impl.hpp"
8
9	namespace cv {
10	namespace dnn {
11	CV__DNN_INLINE_NS_BEGIN
12
13
14	// FIXIT drop from inference API
15	static
16	void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vector<int>& zeropoints)
17	{
18	const int qmin = -`128`; // INT8_MIN
19	const int qmax = `127`; // INT8_MAX
20
21	double rmin, rmax, sc, zp;
22	cv::minMaxIdx(src, minVal: &rmin, maxVal: &rmax);
23
24	// 0 must be present in the range [rmin, rmax]
25	rmin = std::min(a: rmin, b: `0.0`);
26	rmax = std::max(a: rmax, b: `0.0`);
27
28	sc = (rmax == rmin) ? `1.0` : (rmax - rmin)/(qmax - qmin);
29	zp = qmin - (rmin/sc);
30
31	scales.push_back(x: (float)sc);
32	zeropoints.push_back(x: (int)std::round(x: zp));
33	}
34
35	// FIXIT drop from inference API
36	Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
37	{
38	// Net can be quantized only once.
39	if (netWasQuantized)
40	CV_Error(Error::StsBadArg, "Cannot quantize a quantized net");
41
42	CV_CheckType(inputsDtype, inputsDtype == CV_32F \|\| inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S");
43	CV_CheckType(outputsDtype, outputsDtype == CV_32F \|\| outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S");
44
45	bool originalFusion = fusion;
46	int prefBackend = preferableBackend;
47	int prefTarget = preferableTarget;
48
49	// Disable fusions and use CPU backend to quantize net
50	// FIXIT: we should not modify original network!
51	setPreferableBackend(net, backendId: DNN_BACKEND_OPENCV);
52	setPreferableTarget(DNN_TARGET_CPU);
53	enableFusion(fusion_: false);
54	enableWinograd(useWinograd_: false);
55
56	if (calibData.isMat())
57	{
58	setInput(blob: calibData.getMat(), /name=/"", /scalefactor=/`1.0`, /mean=/Scalar ());
59	}
60	else if (calibData.isMatVector())
61	{
62	std::vector<Mat> calibDataVec;
63	calibData.getMatVector(mv&: calibDataVec);
64
65	std::vector<String> inpNames = netInputLayer ->outNames;
66	CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs");
67	for (int i = `0`; i < calibDataVec.size(); i++)
68	setInput(blob: calibDataVec [i], name: inpNames [i], /scalefactor=/`1.0`, /mean=/Scalar ());
69	}
70
71	std::vector<String> outNames = getUnconnectedOutLayersNames();
72	std::vector<LayerPin> pins;
73	for (int i = `0`; i < outNames.size(); i++)
74	pins.push_back(x: getPinByAlias(layerName: outNames [i]));
75	setUpNet(pins);
76
77	// Compute scales and zeropoints for all the layers
78	std::vector<std::vector<float> > scales;
79	std::vector<std::vector<int> > zeropoints;
80	for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it ++)
81	{
82	LayerData& ld = it ->second;
83	if (!ld.skip)
84	{
85	Ptr<Layer> layer = ld.layerInstance;
86	std::vector<Mat> inps(ld.inputBlobs.size());
87	for (int i = `0`; i < ld.inputBlobs.size(); ++i)
88	inps [i] = *ld.inputBlobs [i];
89	layer ->forward(inputs: inps, outputs: ld.outputBlobs, internals: ld.internals);
90	}
91
92	std::vector<float> sc;
93	std::vector<int> zp;
94	if (ld.type == "TanH")
95	{
96	sc.push_back(x: `1.f`/`128`);
97	zp.push_back(x: `0`);
98	}
99	else if (ld.type == "Sigmoid" \|\| ld.type == "Softmax" \|\| ld.type == "SoftMax")
100	{
101	if (ld.params.get<bool>(key: "log_softmax", defaultValue: false))
102	{
103	sc.push_back(x: `16.f`/`256`);
104	zp.push_back(x: `127`);
105	}
106	else
107	{
108	sc.push_back(x: `1.f`/`256`);
109	zp.push_back(x: -`128`);
110	}
111	}
112	else if (ld.type == "Split" \|\| ld.type == "Slice" \|\| ld.type == "Crop")
113	{
114	std::vector<float> inp_sc; std::vector<int> inp_zp;
115	getQuantizationParams(src: *ld.inputBlobs [`0`], scales&: inp_sc, zeropoints&: inp_zp);
116	sc.assign(n: ld.outputBlobs.size(), val: inp_sc [`0`]);
117	zp.assign(n: ld.outputBlobs.size(), val: inp_zp [`0`]);
118	}
119	else
120	{
121	for (int i = `0`; i < ld.outputBlobs.size(); i++)
122	getQuantizationParams(src: ld.outputBlobs [i], scales&: sc, zeropoints&: zp);
123	}
124	scales.push_back(x: sc);
125	zeropoints.push_back(x: zp);
126	}
127
128	// For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs
129	// is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints
130	// TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer
131	for (Impl::MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
132	{
133	LayerData& ld = it ->second;
134	// Layers with multiple outputs. Number of outputs is equal to number of inputs
135	if (ld.type == "Blank" \|\| ld.type == "Dropout" \|\| ld.type == "Identity" \|\| ld.type == "Silence" \|\|
136	ld.type == "Flatten" \|\| ld.type == "Padding" \|\| ld.type == "Permute" \|\| ld.type == "Reshape" \|\|
137	ld.type == "ReLU6" \|\| ld.type == "Reorg" \|\| ld.type == "ShuffleChannel" \|\| ld.type == "Resize" \|\|
138	(ld.type == "ReLU" && !ld.params.get<float>(key: "negative_slope", defaultValue: `0.f`)) \|\| / ReLU with negative slope 0 /
139	(ld.type == "Reduce" && (toLowerCase(str: ld.params.get<String>(key: "reduce")) == "max" \|\|
140	toLowerCase(str: ld.params.get<String>(key: "reduce")) == "min")))
141	{
142	for (int i = `0`; i < ld.outputBlobs.size(); i++)
143	{
144	LayerPin &pin = ld.inputBlobsId [i];
145	scales [pin.lid][pin.oid] = scales [ld.id][i];
146	zeropoints [pin.lid][pin.oid] = zeropoints [ld.id][i];
147	}
148	}
149	// Layers with multiple inputs and single output.
150	else if ((ld.type == "Pooling" && toLowerCase(str: ld.params.get<String>(key: "pool", defaultValue: "max")) == "max") / Max Pooling / \|\|
151	(ld.type == "Eltwise" && toLowerCase(str: ld.params.get<String>(key: "operation", defaultValue: "sum")) == "max") / Elementwise max / \|\|
152	ld.type == "Concat")
153	{
154	for (int i = `0`; i < ld.inputBlobsId.size(); i++)
155	{
156	LayerPin &pin = ld.inputBlobsId [i];
157	scales [pin.lid][pin.oid] = scales [ld.id][`0`];
158	zeropoints [pin.lid][pin.oid] = zeropoints [ld.id][`0`];
159	}
160	}
161	}
162
163	// Create a new Net and add quantized layers to it.
164	Net dstNet_;
165	Net::Impl& dstNet = *(dstNet_.impl);
166	dstNet.netWasQuantized = true;
167	dstNet.setInputsNames(netInputLayer ->outNames);
168	dstNet.setPreferableBackend(net&: dstNet_, backendId: prefBackend);
169	dstNet.setPreferableTarget(prefTarget);
170	dstNet.enableFusion(fusion_: originalFusion);
171
172	for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it ++)
173	{
174	LayerData ld = it ->second;
175	if (ld.id == `0`)
176	{
177	LayerData &quantInpLd = dstNet.layers [`0`];
178	quantInpLd.dtype = inputsDtype;
179	quantInpLd.params.set(key: "scales", value: DictValue::arrayReal(begin: scales [`0`].data(), size: scales [`0`].size()));
180	quantInpLd.params.set(key: "zeropoints", value: DictValue::arrayInt(begin: zeropoints [`0`].data(), size: zeropoints [`0`].size()));
181	continue;
182	}
183
184	std::vector<LayerPin> inpPins = ld.inputBlobsId;
185	// Fill input and output scales/zeropoints for the layer
186	std::vector<std::vector<float> > inp_out_sc(`2`);
187	std::vector<std::vector<int> > inp_out_zp(`2`);
188	for (int i = `0`; i < inpPins.size(); i++)
189	{
190	LayerPin &pin = inpPins [i];
191	inp_out_sc [`0`].push_back(x: scales [pin.lid][pin.oid]);
192	inp_out_zp [`0`].push_back(x: zeropoints [pin.lid][pin.oid]);
193	}
194	inp_out_sc [`1`] = scales [ld.id];
195	inp_out_zp [`1`] = zeropoints [ld.id];
196
197	// Set the quantization type, per-tensor quantize or per-channel quantize.
198	// Especially for Convolution layer and Fully connection layer.
199	ld.params.set(key: "per_channel", value: perChannel);
200
201	// Quantize layer
202	Ptr<Layer> layer = ld.layerInstance;
203	if (layer ->tryQuantize(scales: inp_out_sc, zeropoints: inp_out_zp, params&: ld.params))
204	{
205	ld.type += "Int8";
206	ld.dtype = CV_8S;
207	}
208	ld.params.set(key: "scales", value: DictValue::arrayReal(begin: inp_out_sc [`1`].data(), size: inp_out_sc [`1`].size()));
209	ld.params.set(key: "zeropoints", value: DictValue::arrayInt(begin: inp_out_zp [`1`].data(), size: inp_out_zp [`1`].size()));
210
211	// Check and add quantize/dequantize node before layer
212	for (int i = `0`; i < inpPins.size(); i++)
213	{
214	LayerPin &pin = inpPins [i];
215	LayerData &inpLd = dstNet.getLayerData(layerName: getLayerName(id: pin.lid));
216	pin.lid = inpLd.id;
217	if (inpLd.dtype != ld.dtype)
218	{
219	String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format(fmt: "quantize/%s/%d", inpLd.name.c_str(), pin.oid)
220	: cv::format(fmt: "dequantize/%s/%d", inpLd.name.c_str(), pin.oid);
221	// Check if quantize/dequantize node for the input layer already exists
222	if (dstNet.getLayerId(layerName) >= `0`)
223	{
224	pin.lid = dstNet.getLayerId(layerName);
225	pin.oid = `0`;
226	}
227	else
228	{
229	LayerParams lp;
230	lp.set(key: "scales", value: inp_out_sc [`0`][i]);
231	lp.set(key: "zeropoints", value: inp_out_zp [`0`][i]);
232	lp.name = layerName;
233	lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize";
234	int newLid = dstNet.addLayer(name: lp.name, type: lp.type, dtype: ld.dtype, params&: lp);
235	dstNet.connect(outLayerId: pin.lid, outNum: pin.oid, inLayerId: newLid, inNum: `0`);
236	pin.lid = newLid; pin.oid = `0`;
237	}
238	}
239	}
240
241	// Add quantized layer to Net and connect to its inputs.
242	int newLid = dstNet.addLayer(name: ld.name, type: ld.type, dtype: ld.dtype, params&: ld.params);
243	for( int i = `0`; i < inpPins.size(); i++ )
244	dstNet.connect(outLayerId: inpPins [i].lid, outNum: inpPins [i].oid, inLayerId: newLid, inNum: i);
245
246	// If the layer is a output layer, add quantize/dequantize node after it based on output's data type.
247	if (ld.requiredOutputs.size() == `0` && ld.dtype != outputsDtype)
248	{
249	LayerParams lp;
250	lp.set(key: "scales", value: inp_out_sc [`1`][`0`]);
251	lp.set(key: "zeropoints", value: inp_out_zp [`1`][`0`]);
252	lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name;
253	lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize";
254	dstNet.addLayerToPrev(name: lp.name, type: lp.type, dtype: outputsDtype, params&: lp);
255	}
256	}
257	// Restore FP32 Net's backend, target and fusion
258	setPreferableBackend(net, backendId: prefBackend);
259	setPreferableTarget(prefTarget);
260	enableFusion(fusion_: originalFusion);
261	return dstNet_;
262	}
263
264	// FIXIT drop from inference API
265	void Net::Impl::getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /const/
266	{
267	if (!netWasQuantized)
268	CV_Error(Error::StsBadFunc, "Net isn't quantized");
269
270	LayerParams &lp = layers [`0`].params;
271	DictValue sc = lp.get(key: "scales");
272	DictValue zp = lp.get(key: "zeropoints");
273
274	for (int i = `0`; i < sc.size(); i++)
275	{
276	scales.push_back(x: sc.get<float>(idx: i));
277	zeropoints.push_back(x: zp.get<int>(idx: i));
278	}
279	}
280
281	// FIXIT drop from inference API
282	void Net::Impl::getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /const/
283	{
284	if (!netWasQuantized)
285	CV_Error(Error::StsBadFunc, "Net isn't quantized");
286
287	std::vector<int> outLayerIds = getUnconnectedOutLayers();
288	for (auto &lid : outLayerIds)
289	{
290	LayerParams &lp = layers [lid].params;
291	DictValue sc = lp.get(key: "scales");
292	DictValue zp = lp.get(key: "zeropoints");
293
294	for (int i = `0`; i < sc.size(); i++)
295	{
296	scales.push_back(x: sc.get<float>(idx: i));
297	zeropoints.push_back(x: zp.get<int>(idx: i));
298	}
299	}
300	}
301
302
303	CV__DNN_INLINE_NS_END
304	}} // namespace cv::dnn
305

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of opencv/modules/dnn/src/net_quantization.cpp