1 | // This file is part of OpenCV project. |
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html. |
4 | |
5 | #include "precomp.hpp" |
6 | |
7 | #include "net_impl.hpp" |
8 | |
9 | namespace cv { |
10 | namespace dnn { |
11 | CV__DNN_INLINE_NS_BEGIN |
12 | |
13 | |
14 | // FIXIT drop from inference API |
15 | static |
16 | void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vector<int>& zeropoints) |
17 | { |
18 | const int qmin = -128; // INT8_MIN |
19 | const int qmax = 127; // INT8_MAX |
20 | |
21 | double rmin, rmax, sc, zp; |
22 | cv::minMaxIdx(src, minVal: &rmin, maxVal: &rmax); |
23 | |
24 | // 0 must be present in the range [rmin, rmax] |
25 | rmin = std::min(a: rmin, b: 0.0); |
26 | rmax = std::max(a: rmax, b: 0.0); |
27 | |
28 | sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin); |
29 | zp = qmin - (rmin/sc); |
30 | |
31 | scales.push_back(x: (float)sc); |
32 | zeropoints.push_back(x: (int)std::round(x: zp)); |
33 | } |
34 | |
35 | // FIXIT drop from inference API |
36 | Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel) |
37 | { |
38 | // Net can be quantized only once. |
39 | if (netWasQuantized) |
40 | CV_Error(Error::StsBadArg, "Cannot quantize a quantized net" ); |
41 | |
42 | CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S" ); |
43 | CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S" ); |
44 | |
45 | bool originalFusion = fusion; |
46 | int prefBackend = preferableBackend; |
47 | int prefTarget = preferableTarget; |
48 | |
49 | // Disable fusions and use CPU backend to quantize net |
50 | // FIXIT: we should not modify original network! |
51 | setPreferableBackend(net, backendId: DNN_BACKEND_OPENCV); |
52 | setPreferableTarget(DNN_TARGET_CPU); |
53 | enableFusion(fusion_: false); |
54 | enableWinograd(useWinograd_: false); |
55 | |
56 | if (calibData.isMat()) |
57 | { |
58 | setInput(blob: calibData.getMat(), /*name=*/"" , /*scalefactor=*/1.0, /*mean=*/Scalar()); |
59 | } |
60 | else if (calibData.isMatVector()) |
61 | { |
62 | std::vector<Mat> calibDataVec; |
63 | calibData.getMatVector(mv&: calibDataVec); |
64 | |
65 | std::vector<String> inpNames = netInputLayer->outNames; |
66 | CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs" ); |
67 | for (int i = 0; i < calibDataVec.size(); i++) |
68 | setInput(blob: calibDataVec[i], name: inpNames[i], /*scalefactor=*/1.0, /*mean=*/Scalar()); |
69 | } |
70 | |
71 | std::vector<String> outNames = getUnconnectedOutLayersNames(); |
72 | std::vector<LayerPin> pins; |
73 | for (int i = 0; i < outNames.size(); i++) |
74 | pins.push_back(x: getPinByAlias(layerName: outNames[i])); |
75 | setUpNet(pins); |
76 | |
77 | // Compute scales and zeropoints for all the layers |
78 | std::vector<std::vector<float> > scales; |
79 | std::vector<std::vector<int> > zeropoints; |
80 | for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) |
81 | { |
82 | LayerData& ld = it->second; |
83 | if (!ld.skip) |
84 | { |
85 | Ptr<Layer> layer = ld.layerInstance; |
86 | std::vector<Mat> inps(ld.inputBlobs.size()); |
87 | for (int i = 0; i < ld.inputBlobs.size(); ++i) |
88 | inps[i] = *ld.inputBlobs[i]; |
89 | layer->forward(inputs: inps, outputs: ld.outputBlobs, internals: ld.internals); |
90 | } |
91 | |
92 | std::vector<float> sc; |
93 | std::vector<int> zp; |
94 | if (ld.type == "TanH" ) |
95 | { |
96 | sc.push_back(x: 1.f/128); |
97 | zp.push_back(x: 0); |
98 | } |
99 | else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax" ) |
100 | { |
101 | if (ld.params.get<bool>(key: "log_softmax" , defaultValue: false)) |
102 | { |
103 | sc.push_back(x: 16.f/256); |
104 | zp.push_back(x: 127); |
105 | } |
106 | else |
107 | { |
108 | sc.push_back(x: 1.f/256); |
109 | zp.push_back(x: -128); |
110 | } |
111 | } |
112 | else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop" ) |
113 | { |
114 | std::vector<float> inp_sc; std::vector<int> inp_zp; |
115 | getQuantizationParams(src: *ld.inputBlobs[0], scales&: inp_sc, zeropoints&: inp_zp); |
116 | sc.assign(n: ld.outputBlobs.size(), val: inp_sc[0]); |
117 | zp.assign(n: ld.outputBlobs.size(), val: inp_zp[0]); |
118 | } |
119 | else |
120 | { |
121 | for (int i = 0; i < ld.outputBlobs.size(); i++) |
122 | getQuantizationParams(src: ld.outputBlobs[i], scales&: sc, zeropoints&: zp); |
123 | } |
124 | scales.push_back(x: sc); |
125 | zeropoints.push_back(x: zp); |
126 | } |
127 | |
128 | // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs |
129 | // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints |
130 | // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer |
131 | for (Impl::MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) |
132 | { |
133 | LayerData& ld = it->second; |
134 | // Layers with multiple outputs. Number of outputs is equal to number of inputs |
135 | if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || |
136 | ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || |
137 | ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || |
138 | (ld.type == "ReLU" && !ld.params.get<float>(key: "negative_slope" , defaultValue: 0.f)) || /* ReLU with negative slope 0 */ |
139 | (ld.type == "Reduce" && (toLowerCase(str: ld.params.get<String>(key: "reduce" )) == "max" || |
140 | toLowerCase(str: ld.params.get<String>(key: "reduce" )) == "min" ))) |
141 | { |
142 | for (int i = 0; i < ld.outputBlobs.size(); i++) |
143 | { |
144 | LayerPin &pin = ld.inputBlobsId[i]; |
145 | scales[pin.lid][pin.oid] = scales[ld.id][i]; |
146 | zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i]; |
147 | } |
148 | } |
149 | // Layers with multiple inputs and single output. |
150 | else if ((ld.type == "Pooling" && toLowerCase(str: ld.params.get<String>(key: "pool" , defaultValue: "max" )) == "max" ) /* Max Pooling */ || |
151 | (ld.type == "Eltwise" && toLowerCase(str: ld.params.get<String>(key: "operation" , defaultValue: "sum" )) == "max" ) /* Elementwise max */ || |
152 | ld.type == "Concat" ) |
153 | { |
154 | for (int i = 0; i < ld.inputBlobsId.size(); i++) |
155 | { |
156 | LayerPin &pin = ld.inputBlobsId[i]; |
157 | scales[pin.lid][pin.oid] = scales[ld.id][0]; |
158 | zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0]; |
159 | } |
160 | } |
161 | } |
162 | |
163 | // Create a new Net and add quantized layers to it. |
164 | Net dstNet_; |
165 | Net::Impl& dstNet = *(dstNet_.impl); |
166 | dstNet.netWasQuantized = true; |
167 | dstNet.setInputsNames(netInputLayer->outNames); |
168 | dstNet.setPreferableBackend(net&: dstNet_, backendId: prefBackend); |
169 | dstNet.setPreferableTarget(prefTarget); |
170 | dstNet.enableFusion(fusion_: originalFusion); |
171 | |
172 | for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) |
173 | { |
174 | LayerData ld = it->second; |
175 | if (ld.id == 0) |
176 | { |
177 | LayerData &quantInpLd = dstNet.layers[0]; |
178 | quantInpLd.dtype = inputsDtype; |
179 | quantInpLd.params.set(key: "scales" , value: DictValue::arrayReal(begin: scales[0].data(), size: scales[0].size())); |
180 | quantInpLd.params.set(key: "zeropoints" , value: DictValue::arrayInt(begin: zeropoints[0].data(), size: zeropoints[0].size())); |
181 | continue; |
182 | } |
183 | |
184 | std::vector<LayerPin> inpPins = ld.inputBlobsId; |
185 | // Fill input and output scales/zeropoints for the layer |
186 | std::vector<std::vector<float> > inp_out_sc(2); |
187 | std::vector<std::vector<int> > inp_out_zp(2); |
188 | for (int i = 0; i < inpPins.size(); i++) |
189 | { |
190 | LayerPin &pin = inpPins[i]; |
191 | inp_out_sc[0].push_back(x: scales[pin.lid][pin.oid]); |
192 | inp_out_zp[0].push_back(x: zeropoints[pin.lid][pin.oid]); |
193 | } |
194 | inp_out_sc[1] = scales[ld.id]; |
195 | inp_out_zp[1] = zeropoints[ld.id]; |
196 | |
197 | // Set the quantization type, per-tensor quantize or per-channel quantize. |
198 | // Especially for Convolution layer and Fully connection layer. |
199 | ld.params.set(key: "per_channel" , value: perChannel); |
200 | |
201 | // Quantize layer |
202 | Ptr<Layer> layer = ld.layerInstance; |
203 | if (layer->tryQuantize(scales: inp_out_sc, zeropoints: inp_out_zp, params&: ld.params)) |
204 | { |
205 | ld.type += "Int8" ; |
206 | ld.dtype = CV_8S; |
207 | } |
208 | ld.params.set(key: "scales" , value: DictValue::arrayReal(begin: inp_out_sc[1].data(), size: inp_out_sc[1].size())); |
209 | ld.params.set(key: "zeropoints" , value: DictValue::arrayInt(begin: inp_out_zp[1].data(), size: inp_out_zp[1].size())); |
210 | |
211 | // Check and add quantize/dequantize node before layer |
212 | for (int i = 0; i < inpPins.size(); i++) |
213 | { |
214 | LayerPin &pin = inpPins[i]; |
215 | LayerData &inpLd = dstNet.getLayerData(layerName: getLayerName(id: pin.lid)); |
216 | pin.lid = inpLd.id; |
217 | if (inpLd.dtype != ld.dtype) |
218 | { |
219 | String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format(fmt: "quantize/%s/%d" , inpLd.name.c_str(), pin.oid) |
220 | : cv::format(fmt: "dequantize/%s/%d" , inpLd.name.c_str(), pin.oid); |
221 | // Check if quantize/dequantize node for the input layer already exists |
222 | if (dstNet.getLayerId(layerName) >= 0) |
223 | { |
224 | pin.lid = dstNet.getLayerId(layerName); |
225 | pin.oid = 0; |
226 | } |
227 | else |
228 | { |
229 | LayerParams lp; |
230 | lp.set(key: "scales" , value: inp_out_sc[0][i]); |
231 | lp.set(key: "zeropoints" , value: inp_out_zp[0][i]); |
232 | lp.name = layerName; |
233 | lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize" ; |
234 | int newLid = dstNet.addLayer(name: lp.name, type: lp.type, dtype: ld.dtype, params&: lp); |
235 | dstNet.connect(outLayerId: pin.lid, outNum: pin.oid, inLayerId: newLid, inNum: 0); |
236 | pin.lid = newLid; pin.oid = 0; |
237 | } |
238 | } |
239 | } |
240 | |
241 | // Add quantized layer to Net and connect to its inputs. |
242 | int newLid = dstNet.addLayer(name: ld.name, type: ld.type, dtype: ld.dtype, params&: ld.params); |
243 | for( int i = 0; i < inpPins.size(); i++ ) |
244 | dstNet.connect(outLayerId: inpPins[i].lid, outNum: inpPins[i].oid, inLayerId: newLid, inNum: i); |
245 | |
246 | // If the layer is a output layer, add quantize/dequantize node after it based on output's data type. |
247 | if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype) |
248 | { |
249 | LayerParams lp; |
250 | lp.set(key: "scales" , value: inp_out_sc[1][0]); |
251 | lp.set(key: "zeropoints" , value: inp_out_zp[1][0]); |
252 | lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/" ) + ld.name; |
253 | lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize" ; |
254 | dstNet.addLayerToPrev(name: lp.name, type: lp.type, dtype: outputsDtype, params&: lp); |
255 | } |
256 | } |
257 | // Restore FP32 Net's backend, target and fusion |
258 | setPreferableBackend(net, backendId: prefBackend); |
259 | setPreferableTarget(prefTarget); |
260 | enableFusion(fusion_: originalFusion); |
261 | return dstNet_; |
262 | } |
263 | |
264 | // FIXIT drop from inference API |
265 | void Net::Impl::getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/ |
266 | { |
267 | if (!netWasQuantized) |
268 | CV_Error(Error::StsBadFunc, "Net isn't quantized" ); |
269 | |
270 | LayerParams &lp = layers[0].params; |
271 | DictValue sc = lp.get(key: "scales" ); |
272 | DictValue zp = lp.get(key: "zeropoints" ); |
273 | |
274 | for (int i = 0; i < sc.size(); i++) |
275 | { |
276 | scales.push_back(x: sc.get<float>(idx: i)); |
277 | zeropoints.push_back(x: zp.get<int>(idx: i)); |
278 | } |
279 | } |
280 | |
281 | // FIXIT drop from inference API |
282 | void Net::Impl::getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/ |
283 | { |
284 | if (!netWasQuantized) |
285 | CV_Error(Error::StsBadFunc, "Net isn't quantized" ); |
286 | |
287 | std::vector<int> outLayerIds = getUnconnectedOutLayers(); |
288 | for (auto &lid : outLayerIds) |
289 | { |
290 | LayerParams &lp = layers[lid].params; |
291 | DictValue sc = lp.get(key: "scales" ); |
292 | DictValue zp = lp.get(key: "zeropoints" ); |
293 | |
294 | for (int i = 0; i < sc.size(); i++) |
295 | { |
296 | scales.push_back(x: sc.get<float>(idx: i)); |
297 | zeropoints.push_back(x: zp.get<int>(idx: i)); |
298 | } |
299 | } |
300 | } |
301 | |
302 | |
303 | CV__DNN_INLINE_NS_END |
304 | }} // namespace cv::dnn |
305 | |