1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5#include "precomp.hpp"
6
7#include "net_impl.hpp"
8
9namespace cv {
10namespace dnn {
11CV__DNN_INLINE_NS_BEGIN
12
13
14// FIXIT drop from inference API
15static
16void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vector<int>& zeropoints)
17{
18 const int qmin = -128; // INT8_MIN
19 const int qmax = 127; // INT8_MAX
20
21 double rmin, rmax, sc, zp;
22 cv::minMaxIdx(src, minVal: &rmin, maxVal: &rmax);
23
24 // 0 must be present in the range [rmin, rmax]
25 rmin = std::min(a: rmin, b: 0.0);
26 rmax = std::max(a: rmax, b: 0.0);
27
28 sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin);
29 zp = qmin - (rmin/sc);
30
31 scales.push_back(x: (float)sc);
32 zeropoints.push_back(x: (int)std::round(x: zp));
33}
34
35// FIXIT drop from inference API
36Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel)
37{
38 // Net can be quantized only once.
39 if (netWasQuantized)
40 CV_Error(Error::StsBadArg, "Cannot quantize a quantized net");
41
42 CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S");
43 CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S");
44
45 bool originalFusion = fusion;
46 int prefBackend = preferableBackend;
47 int prefTarget = preferableTarget;
48
49 // Disable fusions and use CPU backend to quantize net
50 // FIXIT: we should not modify original network!
51 setPreferableBackend(net, backendId: DNN_BACKEND_OPENCV);
52 setPreferableTarget(DNN_TARGET_CPU);
53 enableFusion(fusion_: false);
54 enableWinograd(useWinograd_: false);
55
56 if (calibData.isMat())
57 {
58 setInput(blob: calibData.getMat(), /*name=*/"", /*scalefactor=*/1.0, /*mean=*/Scalar());
59 }
60 else if (calibData.isMatVector())
61 {
62 std::vector<Mat> calibDataVec;
63 calibData.getMatVector(mv&: calibDataVec);
64
65 std::vector<String> inpNames = netInputLayer->outNames;
66 CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs");
67 for (int i = 0; i < calibDataVec.size(); i++)
68 setInput(blob: calibDataVec[i], name: inpNames[i], /*scalefactor=*/1.0, /*mean=*/Scalar());
69 }
70
71 std::vector<String> outNames = getUnconnectedOutLayersNames();
72 std::vector<LayerPin> pins;
73 for (int i = 0; i < outNames.size(); i++)
74 pins.push_back(x: getPinByAlias(layerName: outNames[i]));
75 setUpNet(pins);
76
77 // Compute scales and zeropoints for all the layers
78 std::vector<std::vector<float> > scales;
79 std::vector<std::vector<int> > zeropoints;
80 for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
81 {
82 LayerData& ld = it->second;
83 if (!ld.skip)
84 {
85 Ptr<Layer> layer = ld.layerInstance;
86 std::vector<Mat> inps(ld.inputBlobs.size());
87 for (int i = 0; i < ld.inputBlobs.size(); ++i)
88 inps[i] = *ld.inputBlobs[i];
89 layer->forward(inputs: inps, outputs: ld.outputBlobs, internals: ld.internals);
90 }
91
92 std::vector<float> sc;
93 std::vector<int> zp;
94 if (ld.type == "TanH")
95 {
96 sc.push_back(x: 1.f/128);
97 zp.push_back(x: 0);
98 }
99 else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax")
100 {
101 if (ld.params.get<bool>(key: "log_softmax", defaultValue: false))
102 {
103 sc.push_back(x: 16.f/256);
104 zp.push_back(x: 127);
105 }
106 else
107 {
108 sc.push_back(x: 1.f/256);
109 zp.push_back(x: -128);
110 }
111 }
112 else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop")
113 {
114 std::vector<float> inp_sc; std::vector<int> inp_zp;
115 getQuantizationParams(src: *ld.inputBlobs[0], scales&: inp_sc, zeropoints&: inp_zp);
116 sc.assign(n: ld.outputBlobs.size(), val: inp_sc[0]);
117 zp.assign(n: ld.outputBlobs.size(), val: inp_zp[0]);
118 }
119 else
120 {
121 for (int i = 0; i < ld.outputBlobs.size(); i++)
122 getQuantizationParams(src: ld.outputBlobs[i], scales&: sc, zeropoints&: zp);
123 }
124 scales.push_back(x: sc);
125 zeropoints.push_back(x: zp);
126 }
127
128 // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs
129 // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints
130 // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer
131 for (Impl::MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
132 {
133 LayerData& ld = it->second;
134 // Layers with multiple outputs. Number of outputs is equal to number of inputs
135 if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" ||
136 ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" ||
137 ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" ||
138 (ld.type == "ReLU" && !ld.params.get<float>(key: "negative_slope", defaultValue: 0.f)) || /* ReLU with negative slope 0 */
139 (ld.type == "Reduce" && (toLowerCase(str: ld.params.get<String>(key: "reduce")) == "max" ||
140 toLowerCase(str: ld.params.get<String>(key: "reduce")) == "min")))
141 {
142 for (int i = 0; i < ld.outputBlobs.size(); i++)
143 {
144 LayerPin &pin = ld.inputBlobsId[i];
145 scales[pin.lid][pin.oid] = scales[ld.id][i];
146 zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i];
147 }
148 }
149 // Layers with multiple inputs and single output.
150 else if ((ld.type == "Pooling" && toLowerCase(str: ld.params.get<String>(key: "pool", defaultValue: "max")) == "max") /* Max Pooling */ ||
151 (ld.type == "Eltwise" && toLowerCase(str: ld.params.get<String>(key: "operation", defaultValue: "sum")) == "max") /* Elementwise max */ ||
152 ld.type == "Concat")
153 {
154 for (int i = 0; i < ld.inputBlobsId.size(); i++)
155 {
156 LayerPin &pin = ld.inputBlobsId[i];
157 scales[pin.lid][pin.oid] = scales[ld.id][0];
158 zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0];
159 }
160 }
161 }
162
163 // Create a new Net and add quantized layers to it.
164 Net dstNet_;
165 Net::Impl& dstNet = *(dstNet_.impl);
166 dstNet.netWasQuantized = true;
167 dstNet.setInputsNames(netInputLayer->outNames);
168 dstNet.setPreferableBackend(net&: dstNet_, backendId: prefBackend);
169 dstNet.setPreferableTarget(prefTarget);
170 dstNet.enableFusion(fusion_: originalFusion);
171
172 for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
173 {
174 LayerData ld = it->second;
175 if (ld.id == 0)
176 {
177 LayerData &quantInpLd = dstNet.layers[0];
178 quantInpLd.dtype = inputsDtype;
179 quantInpLd.params.set(key: "scales", value: DictValue::arrayReal(begin: scales[0].data(), size: scales[0].size()));
180 quantInpLd.params.set(key: "zeropoints", value: DictValue::arrayInt(begin: zeropoints[0].data(), size: zeropoints[0].size()));
181 continue;
182 }
183
184 std::vector<LayerPin> inpPins = ld.inputBlobsId;
185 // Fill input and output scales/zeropoints for the layer
186 std::vector<std::vector<float> > inp_out_sc(2);
187 std::vector<std::vector<int> > inp_out_zp(2);
188 for (int i = 0; i < inpPins.size(); i++)
189 {
190 LayerPin &pin = inpPins[i];
191 inp_out_sc[0].push_back(x: scales[pin.lid][pin.oid]);
192 inp_out_zp[0].push_back(x: zeropoints[pin.lid][pin.oid]);
193 }
194 inp_out_sc[1] = scales[ld.id];
195 inp_out_zp[1] = zeropoints[ld.id];
196
197 // Set the quantization type, per-tensor quantize or per-channel quantize.
198 // Especially for Convolution layer and Fully connection layer.
199 ld.params.set(key: "per_channel", value: perChannel);
200
201 // Quantize layer
202 Ptr<Layer> layer = ld.layerInstance;
203 if (layer->tryQuantize(scales: inp_out_sc, zeropoints: inp_out_zp, params&: ld.params))
204 {
205 ld.type += "Int8";
206 ld.dtype = CV_8S;
207 }
208 ld.params.set(key: "scales", value: DictValue::arrayReal(begin: inp_out_sc[1].data(), size: inp_out_sc[1].size()));
209 ld.params.set(key: "zeropoints", value: DictValue::arrayInt(begin: inp_out_zp[1].data(), size: inp_out_zp[1].size()));
210
211 // Check and add quantize/dequantize node before layer
212 for (int i = 0; i < inpPins.size(); i++)
213 {
214 LayerPin &pin = inpPins[i];
215 LayerData &inpLd = dstNet.getLayerData(layerName: getLayerName(id: pin.lid));
216 pin.lid = inpLd.id;
217 if (inpLd.dtype != ld.dtype)
218 {
219 String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format(fmt: "quantize/%s/%d", inpLd.name.c_str(), pin.oid)
220 : cv::format(fmt: "dequantize/%s/%d", inpLd.name.c_str(), pin.oid);
221 // Check if quantize/dequantize node for the input layer already exists
222 if (dstNet.getLayerId(layerName) >= 0)
223 {
224 pin.lid = dstNet.getLayerId(layerName);
225 pin.oid = 0;
226 }
227 else
228 {
229 LayerParams lp;
230 lp.set(key: "scales", value: inp_out_sc[0][i]);
231 lp.set(key: "zeropoints", value: inp_out_zp[0][i]);
232 lp.name = layerName;
233 lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize";
234 int newLid = dstNet.addLayer(name: lp.name, type: lp.type, dtype: ld.dtype, params&: lp);
235 dstNet.connect(outLayerId: pin.lid, outNum: pin.oid, inLayerId: newLid, inNum: 0);
236 pin.lid = newLid; pin.oid = 0;
237 }
238 }
239 }
240
241 // Add quantized layer to Net and connect to its inputs.
242 int newLid = dstNet.addLayer(name: ld.name, type: ld.type, dtype: ld.dtype, params&: ld.params);
243 for( int i = 0; i < inpPins.size(); i++ )
244 dstNet.connect(outLayerId: inpPins[i].lid, outNum: inpPins[i].oid, inLayerId: newLid, inNum: i);
245
246 // If the layer is a output layer, add quantize/dequantize node after it based on output's data type.
247 if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype)
248 {
249 LayerParams lp;
250 lp.set(key: "scales", value: inp_out_sc[1][0]);
251 lp.set(key: "zeropoints", value: inp_out_zp[1][0]);
252 lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name;
253 lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize";
254 dstNet.addLayerToPrev(name: lp.name, type: lp.type, dtype: outputsDtype, params&: lp);
255 }
256 }
257 // Restore FP32 Net's backend, target and fusion
258 setPreferableBackend(net, backendId: prefBackend);
259 setPreferableTarget(prefTarget);
260 enableFusion(fusion_: originalFusion);
261 return dstNet_;
262}
263
264// FIXIT drop from inference API
265void Net::Impl::getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/
266{
267 if (!netWasQuantized)
268 CV_Error(Error::StsBadFunc, "Net isn't quantized");
269
270 LayerParams &lp = layers[0].params;
271 DictValue sc = lp.get(key: "scales");
272 DictValue zp = lp.get(key: "zeropoints");
273
274 for (int i = 0; i < sc.size(); i++)
275 {
276 scales.push_back(x: sc.get<float>(idx: i));
277 zeropoints.push_back(x: zp.get<int>(idx: i));
278 }
279}
280
281// FIXIT drop from inference API
282void Net::Impl::getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/
283{
284 if (!netWasQuantized)
285 CV_Error(Error::StsBadFunc, "Net isn't quantized");
286
287 std::vector<int> outLayerIds = getUnconnectedOutLayers();
288 for (auto &lid : outLayerIds)
289 {
290 LayerParams &lp = layers[lid].params;
291 DictValue sc = lp.get(key: "scales");
292 DictValue zp = lp.get(key: "zeropoints");
293
294 for (int i = 0; i < sc.size(); i++)
295 {
296 scales.push_back(x: sc.get<float>(idx: i));
297 zeropoints.push_back(x: zp.get<int>(idx: i));
298 }
299 }
300}
301
302
303CV__DNN_INLINE_NS_END
304}} // namespace cv::dnn
305

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of opencv/modules/dnn/src/net_quantization.cpp