| 1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
| 2 | // |
| 3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
| 4 | // |
| 5 | // By downloading, copying, installing or using the software you agree to this license. |
| 6 | // If you do not agree to this license, do not download, install, |
| 7 | // copy or use the software. |
| 8 | // |
| 9 | // |
| 10 | // License Agreement |
| 11 | // For Open Source Computer Vision Library |
| 12 | // |
| 13 | // Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
| 14 | // Third party copyrights are property of their respective owners. |
| 15 | // |
| 16 | // Redistribution and use in source and binary forms, with or without modification, |
| 17 | // are permitted provided that the following conditions are met: |
| 18 | // |
| 19 | // * Redistribution's of source code must retain the above copyright notice, |
| 20 | // this list of conditions and the following disclaimer. |
| 21 | // |
| 22 | // * Redistribution's in binary form must reproduce the above copyright notice, |
| 23 | // this list of conditions and the following disclaimer in the documentation |
| 24 | // and/or other materials provided with the distribution. |
| 25 | // |
| 26 | // * The name of the copyright holders may not be used to endorse or promote products |
| 27 | // derived from this software without specific prior written permission. |
| 28 | // |
| 29 | // This software is provided by the copyright holders and contributors "as is" and |
| 30 | // any express or implied warranties, including, but not limited to, the implied |
| 31 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
| 32 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
| 33 | // indirect, incidental, special, exemplary, or consequential damages |
| 34 | // (including, but not limited to, procurement of substitute goods or services; |
| 35 | // loss of use, data, or profits; or business interruption) however caused |
| 36 | // and on any theory of liability, whether in contract, strict liability, |
| 37 | // or tort (including negligence or otherwise) arising in any way out of |
| 38 | // the use of this software, even if advised of the possibility of such damage. |
| 39 | // |
| 40 | //M*/ |
| 41 | |
| 42 | #ifndef OPENCV_DNN_DNN_HPP |
| 43 | #define OPENCV_DNN_DNN_HPP |
| 44 | |
| 45 | #include <vector> |
| 46 | #include <opencv2/core.hpp> |
| 47 | #include "opencv2/core/async.hpp" |
| 48 | |
| 49 | #include "../dnn/version.hpp" |
| 50 | |
| 51 | #include <opencv2/dnn/dict.hpp> |
| 52 | |
| 53 | namespace cv { |
| 54 | namespace dnn { |
| 55 | |
| 56 | namespace accessor { |
| 57 | class DnnNetAccessor; // forward declaration |
| 58 | } |
| 59 | |
| 60 | CV__DNN_INLINE_NS_BEGIN |
| 61 | //! @addtogroup dnn |
| 62 | //! @{ |
| 63 | |
| 64 | typedef std::vector<int> MatShape; |
| 65 | |
| 66 | /** |
| 67 | * @brief Enum of computation backends supported by layers. |
| 68 | * @see Net::setPreferableBackend |
| 69 | */ |
| 70 | enum Backend |
| 71 | { |
| 72 | //! DNN_BACKEND_DEFAULT equals to OPENCV_DNN_BACKEND_DEFAULT, which can be defined using CMake or a configuration parameter |
| 73 | DNN_BACKEND_DEFAULT = 0, |
| 74 | DNN_BACKEND_HALIDE, |
| 75 | DNN_BACKEND_INFERENCE_ENGINE, //!< Intel OpenVINO computational backend, supported targets: CPU, OPENCL, OPENCL_FP16, MYRIAD, HDDL, NPU |
| 76 | //!< @note Tutorial how to build OpenCV with OpenVINO: @ref tutorial_dnn_openvino |
| 77 | DNN_BACKEND_OPENCV, |
| 78 | DNN_BACKEND_VKCOM, |
| 79 | DNN_BACKEND_CUDA, |
| 80 | DNN_BACKEND_WEBNN, |
| 81 | DNN_BACKEND_TIMVX, |
| 82 | DNN_BACKEND_CANN, |
| 83 | #if defined(__OPENCV_BUILD) || defined(BUILD_PLUGIN) |
| 84 | #if !defined(OPENCV_BINDING_PARSER) |
| 85 | DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() |
| 86 | DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() |
| 87 | #endif |
| 88 | #endif |
| 89 | }; |
| 90 | |
| 91 | /** |
| 92 | * @brief Enum of target devices for computations. |
| 93 | * @see Net::setPreferableTarget |
| 94 | */ |
| 95 | enum Target |
| 96 | { |
| 97 | DNN_TARGET_CPU = 0, |
| 98 | DNN_TARGET_OPENCL, |
| 99 | DNN_TARGET_OPENCL_FP16, |
| 100 | DNN_TARGET_MYRIAD, |
| 101 | DNN_TARGET_VULKAN, |
| 102 | DNN_TARGET_FPGA, //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin. |
| 103 | DNN_TARGET_CUDA, |
| 104 | DNN_TARGET_CUDA_FP16, |
| 105 | DNN_TARGET_HDDL, |
| 106 | DNN_TARGET_NPU, |
| 107 | DNN_TARGET_CPU_FP16, // Only the ARM platform is supported. Low precision computing, accelerate model inference. |
| 108 | }; |
| 109 | |
| 110 | /** |
| 111 | * @brief Enum of data layout for model inference. |
| 112 | * @see Image2BlobParams |
| 113 | */ |
| 114 | enum DataLayout |
| 115 | { |
| 116 | DNN_LAYOUT_UNKNOWN = 0, |
| 117 | DNN_LAYOUT_ND = 1, //!< OpenCV data layout for 2D data. |
| 118 | DNN_LAYOUT_NCHW = 2, //!< OpenCV data layout for 4D data. |
| 119 | DNN_LAYOUT_NCDHW = 3, //!< OpenCV data layout for 5D data. |
| 120 | DNN_LAYOUT_NHWC = 4, //!< Tensorflow-like data layout for 4D data. |
| 121 | DNN_LAYOUT_NDHWC = 5, //!< Tensorflow-like data layout for 5D data. |
| 122 | DNN_LAYOUT_PLANAR = 6, //!< Tensorflow-like data layout, it should only be used at tf or tflite model parsing. |
| 123 | }; |
| 124 | |
| 125 | CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends(); |
| 126 | CV_EXPORTS_W std::vector<Target> getAvailableTargets(dnn::Backend be); |
| 127 | |
| 128 | /** |
| 129 | * @brief Enables detailed logging of the DNN model loading with CV DNN API. |
| 130 | * @param[in] isDiagnosticsMode Indicates whether diagnostic mode should be set. |
| 131 | * |
| 132 | * Diagnostic mode provides detailed logging of the model loading stage to explore |
| 133 | * potential problems (ex.: not implemented layer type). |
| 134 | * |
| 135 | * @note In diagnostic mode series of assertions will be skipped, it can lead to the |
| 136 | * expected application crash. |
| 137 | */ |
| 138 | CV_EXPORTS void enableModelDiagnostics(bool isDiagnosticsMode); |
| 139 | |
| 140 | /** @brief This class provides all data needed to initialize layer. |
| 141 | * |
| 142 | * It includes dictionary with scalar params (which can be read by using Dict interface), |
| 143 | * blob params #blobs and optional meta information: #name and #type of layer instance. |
| 144 | */ |
| 145 | class CV_EXPORTS LayerParams : public Dict |
| 146 | { |
| 147 | public: |
| 148 | //TODO: Add ability to name blob params |
| 149 | std::vector<Mat> blobs; //!< List of learned parameters stored as blobs. |
| 150 | |
| 151 | String name; //!< Name of the layer instance (optional, can be used internal purposes). |
| 152 | String type; //!< Type name which was used for creating layer by layer factory (optional). |
| 153 | }; |
| 154 | |
| 155 | /** |
| 156 | * @brief Derivatives of this class encapsulates functions of certain backends. |
| 157 | */ |
| 158 | class BackendNode |
| 159 | { |
| 160 | public: |
| 161 | explicit BackendNode(int backendId); |
| 162 | |
| 163 | virtual ~BackendNode(); //!< Virtual destructor to make polymorphism. |
| 164 | |
| 165 | int backendId; //!< Backend identifier. |
| 166 | }; |
| 167 | |
| 168 | /** |
| 169 | * @brief Derivatives of this class wraps cv::Mat for different backends and targets. |
| 170 | */ |
| 171 | class BackendWrapper |
| 172 | { |
| 173 | public: |
| 174 | BackendWrapper(int backendId, int targetId); |
| 175 | |
| 176 | /** |
| 177 | * @brief Wrap cv::Mat for specific backend and target. |
| 178 | * @param[in] targetId Target identifier. |
| 179 | * @param[in] m cv::Mat for wrapping. |
| 180 | * |
| 181 | * Make CPU->GPU data transfer if it's require for the target. |
| 182 | */ |
| 183 | BackendWrapper(int targetId, const cv::Mat& m); |
| 184 | |
| 185 | /** |
| 186 | * @brief Make wrapper for reused cv::Mat. |
| 187 | * @param[in] base Wrapper of cv::Mat that will be reused. |
| 188 | * @param[in] shape Specific shape. |
| 189 | * |
| 190 | * Initialize wrapper from another one. It'll wrap the same host CPU |
| 191 | * memory and mustn't allocate memory on device(i.e. GPU). It might |
| 192 | * has different shape. Use in case of CPU memory reusing for reuse |
| 193 | * associated memory on device too. |
| 194 | */ |
| 195 | BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape); |
| 196 | |
| 197 | virtual ~BackendWrapper(); //!< Virtual destructor to make polymorphism. |
| 198 | |
| 199 | /** |
| 200 | * @brief Transfer data to CPU host memory. |
| 201 | */ |
| 202 | virtual void copyToHost() = 0; |
| 203 | |
| 204 | /** |
| 205 | * @brief Indicate that an actual data is on CPU. |
| 206 | */ |
| 207 | virtual void setHostDirty() = 0; |
| 208 | |
| 209 | int backendId; //!< Backend identifier. |
| 210 | int targetId; //!< Target identifier. |
| 211 | }; |
| 212 | |
| 213 | class CV_EXPORTS ActivationLayer; |
| 214 | |
| 215 | /** @brief This interface class allows to build new Layers - are building blocks of networks. |
| 216 | * |
| 217 | * Each class, derived from Layer, must implement forward() method to compute outputs. |
| 218 | * Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros. |
| 219 | */ |
| 220 | class CV_EXPORTS_W Layer : public Algorithm |
| 221 | { |
| 222 | public: |
| 223 | |
| 224 | //! List of learned parameters must be stored here to allow read them by using Net::getParam(). |
| 225 | CV_PROP_RW std::vector<Mat> blobs; |
| 226 | |
| 227 | /** @brief Computes and sets internal parameters according to inputs, outputs and blobs. |
| 228 | * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead |
| 229 | * @param[in] input vector of already allocated input blobs |
| 230 | * @param[out] output vector of already allocated output blobs |
| 231 | * |
| 232 | * This method is called after network has allocated all memory for input and output blobs |
| 233 | * and before inferencing. |
| 234 | */ |
| 235 | CV_DEPRECATED_EXTERNAL |
| 236 | virtual void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output); |
| 237 | |
| 238 | /** @brief Computes and sets internal parameters according to inputs, outputs and blobs. |
| 239 | * @param[in] inputs vector of already allocated input blobs |
| 240 | * @param[out] outputs vector of already allocated output blobs |
| 241 | * |
| 242 | * This method is called after network has allocated all memory for input and output blobs |
| 243 | * and before inferencing. |
| 244 | */ |
| 245 | CV_WRAP virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs); |
| 246 | |
| 247 | /** @brief Given the @p input blobs, computes the output @p blobs. |
| 248 | * @deprecated Use Layer::forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) instead |
| 249 | * @param[in] input the input blobs. |
| 250 | * @param[out] output allocated output blobs, which will store results of the computation. |
| 251 | * @param[out] internals allocated internal blobs |
| 252 | */ |
| 253 | CV_DEPRECATED_EXTERNAL |
| 254 | virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals); |
| 255 | |
| 256 | /** @brief Given the @p input blobs, computes the output @p blobs. |
| 257 | * @param[in] inputs the input blobs. |
| 258 | * @param[out] outputs allocated output blobs, which will store results of the computation. |
| 259 | * @param[out] internals allocated internal blobs |
| 260 | */ |
| 261 | virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); |
| 262 | |
| 263 | /** @brief Tries to quantize the given layer and compute the quantization parameters required for fixed point implementation. |
| 264 | * @param[in] scales input and output scales. |
| 265 | * @param[in] zeropoints input and output zeropoints. |
| 266 | * @param[out] params Quantized parameters required for fixed point implementation of that layer. |
| 267 | * @returns True if layer can be quantized. |
| 268 | */ |
| 269 | virtual bool tryQuantize(const std::vector<std::vector<float> > &scales, |
| 270 | const std::vector<std::vector<int> > &zeropoints, LayerParams& params); |
| 271 | |
| 272 | /** @brief Given the @p input blobs, computes the output @p blobs. |
| 273 | * @param[in] inputs the input blobs. |
| 274 | * @param[out] outputs allocated output blobs, which will store results of the computation. |
| 275 | * @param[out] internals allocated internal blobs |
| 276 | */ |
| 277 | void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); |
| 278 | |
| 279 | /** @brief |
| 280 | * @overload |
| 281 | * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead |
| 282 | */ |
| 283 | CV_DEPRECATED_EXTERNAL |
| 284 | void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs); |
| 285 | |
| 286 | /** @brief |
| 287 | * @overload |
| 288 | * @deprecated Use Layer::finalize(InputArrayOfArrays, OutputArrayOfArrays) instead |
| 289 | */ |
| 290 | CV_DEPRECATED std::vector<Mat> finalize(const std::vector<Mat> &inputs); |
| 291 | |
| 292 | /** @brief Allocates layer and computes output. |
| 293 | * @deprecated This method will be removed in the future release. |
| 294 | */ |
| 295 | CV_DEPRECATED CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs, |
| 296 | CV_IN_OUT std::vector<Mat> &internals); |
| 297 | |
| 298 | /** @brief Returns index of input blob into the input array. |
| 299 | * @param inputName label of input blob |
| 300 | * |
| 301 | * Each layer input and output can be labeled to easily identify them using "%<layer_name%>[.output_name]" notation. |
| 302 | * This method maps label of input blob to its index into input vector. |
| 303 | */ |
| 304 | virtual int inputNameToIndex(String inputName); // FIXIT const |
| 305 | /** @brief Returns index of output blob in output array. |
| 306 | * @see inputNameToIndex() |
| 307 | */ |
| 308 | CV_WRAP virtual int outputNameToIndex(const String& outputName); // FIXIT const |
| 309 | |
| 310 | /** |
| 311 | * @brief Ask layer if it support specific backend for doing computations. |
| 312 | * @param[in] backendId computation backend identifier. |
| 313 | * @see Backend |
| 314 | */ |
| 315 | virtual bool supportBackend(int backendId); // FIXIT const |
| 316 | |
| 317 | /** |
| 318 | * @brief Returns Halide backend node. |
| 319 | * @param[in] inputs Input Halide buffers. |
| 320 | * @see BackendNode, BackendWrapper |
| 321 | * |
| 322 | * Input buffers should be exactly the same that will be used in forward invocations. |
| 323 | * Despite we can use Halide::ImageParam based on input shape only, |
| 324 | * it helps prevent some memory management issues (if something wrong, |
| 325 | * Halide tests will be failed). |
| 326 | */ |
| 327 | virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs); |
| 328 | |
| 329 | virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs, const std::vector<Ptr<BackendNode> >& nodes); |
| 330 | |
| 331 | virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs, std::vector<Ptr<BackendWrapper> > &outputs); |
| 332 | |
| 333 | virtual Ptr<BackendNode> initWebnn(const std::vector<Ptr<BackendWrapper> > &inputs, const std::vector<Ptr<BackendNode> >& nodes); |
| 334 | |
| 335 | /** |
| 336 | * @brief Returns a CUDA backend node |
| 337 | * |
| 338 | * @param context void pointer to CSLContext object |
| 339 | * @param inputs layer inputs |
| 340 | * @param outputs layer outputs |
| 341 | */ |
| 342 | virtual Ptr<BackendNode> initCUDA( |
| 343 | void *context, |
| 344 | const std::vector<Ptr<BackendWrapper>>& inputs, |
| 345 | const std::vector<Ptr<BackendWrapper>>& outputs |
| 346 | ); |
| 347 | |
| 348 | /** |
| 349 | * @brief Returns a TimVX backend node |
| 350 | * |
| 351 | * @param timVxInfo void pointer to CSLContext object |
| 352 | * @param inputsWrapper layer inputs |
| 353 | * @param outputsWrapper layer outputs |
| 354 | * @param isLast if the node is the last one of the TimVX Graph. |
| 355 | */ |
| 356 | virtual Ptr<BackendNode> initTimVX(void* timVxInfo, |
| 357 | const std::vector<Ptr<BackendWrapper> > &inputsWrapper, |
| 358 | const std::vector<Ptr<BackendWrapper> > &outputsWrapper, |
| 359 | bool isLast); |
| 360 | |
| 361 | /** |
| 362 | * @brief Returns a CANN backend node |
| 363 | * |
| 364 | * @param inputs input tensors of CANN operator |
| 365 | * @param outputs output tensors of CANN operator |
| 366 | * @param nodes nodes of input tensors |
| 367 | */ |
| 368 | virtual Ptr<BackendNode> initCann(const std::vector<Ptr<BackendWrapper> > &inputs, |
| 369 | const std::vector<Ptr<BackendWrapper> > &outputs, |
| 370 | const std::vector<Ptr<BackendNode> >& nodes); |
| 371 | |
| 372 | /** |
| 373 | * @brief Automatic Halide scheduling based on layer hyper-parameters. |
| 374 | * @param[in] node Backend node with Halide functions. |
| 375 | * @param[in] inputs Blobs that will be used in forward invocations. |
| 376 | * @param[in] outputs Blobs that will be used in forward invocations. |
| 377 | * @param[in] targetId Target identifier |
| 378 | * @see BackendNode, Target |
| 379 | * |
| 380 | * Layer don't use own Halide::Func members because we can have applied |
| 381 | * layers fusing. In this way the fused function should be scheduled. |
| 382 | */ |
| 383 | virtual void applyHalideScheduler(Ptr<BackendNode>& node, |
| 384 | const std::vector<Mat*> &inputs, |
| 385 | const std::vector<Mat> &outputs, |
| 386 | int targetId) const; |
| 387 | |
| 388 | /** |
| 389 | * @brief Implement layers fusing. |
| 390 | * @param[in] node Backend node of bottom layer. |
| 391 | * @see BackendNode |
| 392 | * |
| 393 | * Actual for graph-based backends. If layer attached successfully, |
| 394 | * returns non-empty cv::Ptr to node of the same backend. |
| 395 | * Fuse only over the last function. |
| 396 | */ |
| 397 | virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node); |
| 398 | |
| 399 | /** |
| 400 | * @brief Tries to attach to the layer the subsequent activation layer, i.e. do the layer fusion in a partial case. |
| 401 | * @param[in] layer The subsequent activation layer. |
| 402 | * |
| 403 | * Returns true if the activation layer has been attached successfully. |
| 404 | */ |
| 405 | virtual bool setActivation(const Ptr<ActivationLayer>& layer); |
| 406 | |
| 407 | /** |
| 408 | * @brief Try to fuse current layer with a next one |
| 409 | * @param[in] top Next layer to be fused. |
| 410 | * @returns True if fusion was performed. |
| 411 | */ |
| 412 | virtual bool tryFuse(Ptr<Layer>& top); |
| 413 | |
| 414 | /** |
| 415 | * @brief Returns parameters of layers with channel-wise multiplication and addition. |
| 416 | * @param[out] scale Channel-wise multipliers. Total number of values should |
| 417 | * be equal to number of channels. |
| 418 | * @param[out] shift Channel-wise offsets. Total number of values should |
| 419 | * be equal to number of channels. |
| 420 | * |
| 421 | * Some layers can fuse their transformations with further layers. |
| 422 | * In example, convolution + batch normalization. This way base layer |
| 423 | * use weights from layer after it. Fused layer is skipped. |
| 424 | * By default, @p scale and @p shift are empty that means layer has no |
| 425 | * element-wise multiplications or additions. |
| 426 | */ |
| 427 | virtual void getScaleShift(Mat& scale, Mat& shift) const; |
| 428 | |
| 429 | /** |
| 430 | * @brief Returns scale and zeropoint of layers |
| 431 | * @param[out] scale Output scale |
| 432 | * @param[out] zeropoint Output zeropoint |
| 433 | * |
| 434 | * By default, @p scale is 1 and @p zeropoint is 0. |
| 435 | */ |
| 436 | virtual void getScaleZeropoint(float& scale, int& zeropoint) const; |
| 437 | |
| 438 | |
| 439 | /** |
| 440 | * @brief "Detaches" all the layers, attached to particular layer. |
| 441 | */ |
| 442 | virtual void unsetAttached(); |
| 443 | |
| 444 | virtual bool getMemoryShapes(const std::vector<MatShape> &inputs, |
| 445 | const int requiredOutputs, |
| 446 | std::vector<MatShape> &outputs, |
| 447 | std::vector<MatShape> &internals) const; |
| 448 | |
| 449 | virtual int64 getFLOPS(const std::vector<MatShape> &inputs, |
| 450 | const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;} |
| 451 | |
| 452 | virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs); |
| 453 | |
| 454 | CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes. |
| 455 | CV_PROP String type; //!< Type name which was used for creating layer by layer factory. |
| 456 | CV_PROP int preferableTarget; //!< prefer target for layer forwarding |
| 457 | |
| 458 | Layer(); |
| 459 | explicit Layer(const LayerParams ¶ms); //!< Initializes only #name, #type and #blobs fields. |
| 460 | void setParamsFrom(const LayerParams ¶ms); //!< Initializes only #name, #type and #blobs fields. |
| 461 | virtual ~Layer(); |
| 462 | }; |
| 463 | |
| 464 | /** @brief This class allows to create and manipulate comprehensive artificial neural networks. |
| 465 | * |
| 466 | * Neural network is presented as directed acyclic graph (DAG), where vertices are Layer instances, |
| 467 | * and edges specify relationships between layers inputs and outputs. |
| 468 | * |
| 469 | * Each network layer has unique integer id and unique string name inside its network. |
| 470 | * LayerId can store either layer name or layer id. |
| 471 | * |
| 472 | * This class supports reference counting of its instances, i. e. copies point to the same instance. |
| 473 | */ |
| 474 | class CV_EXPORTS_W_SIMPLE Net |
| 475 | { |
| 476 | public: |
| 477 | |
| 478 | CV_WRAP Net(); //!< Default constructor. |
| 479 | CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore. |
| 480 | |
| 481 | /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR). |
| 482 | * @param[in] xml XML configuration file with network's topology. |
| 483 | * @param[in] bin Binary file with trained weights. |
| 484 | * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine |
| 485 | * backend. |
| 486 | */ |
| 487 | CV_WRAP static Net readFromModelOptimizer(CV_WRAP_FILE_PATH const String& xml, CV_WRAP_FILE_PATH const String& bin); |
| 488 | |
| 489 | /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). |
| 490 | * @param[in] bufferModelConfig buffer with model's configuration. |
| 491 | * @param[in] bufferWeights buffer with model's trained weights. |
| 492 | * @returns Net object. |
| 493 | */ |
| 494 | CV_WRAP static |
| 495 | Net readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights); |
| 496 | |
| 497 | /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). |
| 498 | * @param[in] bufferModelConfigPtr buffer pointer of model's configuration. |
| 499 | * @param[in] bufferModelConfigSize buffer size of model's configuration. |
| 500 | * @param[in] bufferWeightsPtr buffer pointer of model's trained weights. |
| 501 | * @param[in] bufferWeightsSize buffer size of model's trained weights. |
| 502 | * @returns Net object. |
| 503 | */ |
| 504 | static |
| 505 | Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, |
| 506 | const uchar* bufferWeightsPtr, size_t bufferWeightsSize); |
| 507 | |
| 508 | /** Returns true if there are no layers in the network. */ |
| 509 | CV_WRAP bool empty() const; |
| 510 | |
| 511 | /** @brief Dump net to String |
| 512 | * @returns String with structure, hyperparameters, backend, target and fusion |
| 513 | * Call method after setInput(). To see correct backend, target and fusion run after forward(). |
| 514 | */ |
| 515 | CV_WRAP String dump(); |
| 516 | /** @brief Dump net structure, hyperparameters, backend, target and fusion to dot file |
| 517 | * @param path path to output file with .dot extension |
| 518 | * @see dump() |
| 519 | */ |
| 520 | CV_WRAP void dumpToFile(CV_WRAP_FILE_PATH const String& path); |
| 521 | /** @brief Dump net structure, hyperparameters, backend, target and fusion to pbtxt file |
| 522 | * @param path path to output file with .pbtxt extension |
| 523 | * |
| 524 | * Use Netron (https://netron.app) to open the target file to visualize the model. |
| 525 | * Call method after setInput(). To see correct backend, target and fusion run after forward(). |
| 526 | */ |
| 527 | CV_WRAP void dumpToPbtxt(CV_WRAP_FILE_PATH const String& path); |
| 528 | |
| 529 | /** @brief Adds new layer to the net. |
| 530 | * @param name unique name of the adding layer. |
| 531 | * @param type typename of the adding layer (type must be registered in LayerRegister). |
| 532 | * @param dtype datatype of output blobs. |
| 533 | * @param params parameters which will be used to initialize the creating layer. |
| 534 | * @returns unique identifier of created layer, or -1 if a failure will happen. |
| 535 | */ |
| 536 | CV_WRAP int addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms); |
| 537 | |
| 538 | /** @overload Datatype of output blobs set to default CV_32F */ |
| 539 | int addLayer(const String &name, const String &type, LayerParams ¶ms); |
| 540 | |
| 541 | /** @brief Adds new layer and connects its first input to the first output of previously added layer. |
| 542 | * @see addLayer() |
| 543 | */ |
| 544 | CV_WRAP int addLayerToPrev(const String &name, const String &type, const int &dtype, LayerParams ¶ms); |
| 545 | |
| 546 | /** @overload */ |
| 547 | int addLayerToPrev(const String &name, const String &type, LayerParams ¶ms); |
| 548 | |
| 549 | /** @brief Converts string name of the layer to the integer identifier. |
| 550 | * @returns id of the layer, or -1 if the layer wasn't found. |
| 551 | */ |
| 552 | CV_WRAP int getLayerId(const String &layer) const; |
| 553 | |
| 554 | CV_WRAP std::vector<String> getLayerNames() const; |
| 555 | |
| 556 | /** @brief Container for strings and integers. |
| 557 | * |
| 558 | * @deprecated Use getLayerId() with int result. |
| 559 | */ |
| 560 | typedef DictValue LayerId; |
| 561 | |
| 562 | /** @brief Returns pointer to layer with specified id or name which the network use. */ |
| 563 | CV_WRAP Ptr<Layer> getLayer(int layerId) const; |
| 564 | /** @overload |
| 565 | * @deprecated Use int getLayerId(const String &layer) |
| 566 | */ |
| 567 | CV_WRAP inline Ptr<Layer> getLayer(const String& layerName) const { return getLayer(layerId: getLayerId(layer: layerName)); } |
| 568 | /** @overload |
| 569 | * @deprecated to be removed |
| 570 | */ |
| 571 | CV_WRAP Ptr<Layer> getLayer(const LayerId& layerId) const; |
| 572 | |
| 573 | /** @brief Returns pointers to input layers of specific layer. */ |
| 574 | std::vector<Ptr<Layer> > getLayerInputs(int layerId) const; // FIXIT: CV_WRAP |
| 575 | |
| 576 | /** @brief Connects output of the first layer to input of the second layer. |
| 577 | * @param outPin descriptor of the first layer output. |
| 578 | * @param inpPin descriptor of the second layer input. |
| 579 | * |
| 580 | * Descriptors have the following template <DFN><layer_name>[.input_number]</DFN>: |
| 581 | * - the first part of the template <DFN>layer_name</DFN> is string name of the added layer. |
| 582 | * If this part is empty then the network input pseudo layer will be used; |
| 583 | * - the second optional part of the template <DFN>input_number</DFN> |
| 584 | * is either number of the layer input, either label one. |
| 585 | * If this part is omitted then the first layer input will be used. |
| 586 | * |
| 587 | * @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex() |
| 588 | */ |
| 589 | CV_WRAP void connect(String outPin, String inpPin); |
| 590 | |
| 591 | /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer. |
| 592 | * @param outLayerId identifier of the first layer |
| 593 | * @param outNum number of the first layer output |
| 594 | * @param inpLayerId identifier of the second layer |
| 595 | * @param inpNum number of the second layer input |
| 596 | */ |
| 597 | void connect(int outLayerId, int outNum, int inpLayerId, int inpNum); |
| 598 | |
| 599 | /** @brief Registers network output with name |
| 600 | * |
| 601 | * Function may create additional 'Identity' layer. |
| 602 | * |
| 603 | * @param outputName identifier of the output |
| 604 | * @param layerId identifier of the second layer |
| 605 | * @param outputPort number of the second layer input |
| 606 | * |
| 607 | * @returns index of bound layer (the same as layerId or newly created) |
| 608 | */ |
| 609 | CV_WRAP int registerOutput(const std::string& outputName, int layerId, int outputPort); |
| 610 | |
| 611 | /** @brief Sets outputs names of the network input pseudo layer. |
| 612 | * |
| 613 | * Each net always has special own the network input pseudo layer with id=0. |
| 614 | * This layer stores the user blobs only and don't make any computations. |
| 615 | * In fact, this layer provides the only way to pass user data into the network. |
| 616 | * As any other layer, this layer can label its outputs and this function provides an easy way to do this. |
| 617 | */ |
| 618 | CV_WRAP void setInputsNames(const std::vector<String> &inputBlobNames); |
| 619 | |
| 620 | /** @brief Specify shape of network input. |
| 621 | */ |
| 622 | CV_WRAP void setInputShape(const String &inputName, const MatShape& shape); |
| 623 | |
| 624 | /** @brief Runs forward pass to compute output of layer with name @p outputName. |
| 625 | * @param outputName name for layer which output is needed to get |
| 626 | * @return blob for first output of specified layer. |
| 627 | * @details By default runs forward pass for the whole network. |
| 628 | */ |
| 629 | CV_WRAP Mat forward(const String& outputName = String()); |
| 630 | |
| 631 | /** @brief Runs forward pass to compute output of layer with name @p outputName. |
| 632 | * @param outputName name for layer which output is needed to get |
| 633 | * @details By default runs forward pass for the whole network. |
| 634 | * |
| 635 | * This is an asynchronous version of forward(const String&). |
| 636 | * dnn::DNN_BACKEND_INFERENCE_ENGINE backend is required. |
| 637 | */ |
| 638 | CV_WRAP AsyncArray forwardAsync(const String& outputName = String()); |
| 639 | |
| 640 | /** @brief Runs forward pass to compute output of layer with name @p outputName. |
| 641 | * @param outputBlobs contains all output blobs for specified layer. |
| 642 | * @param outputName name for layer which output is needed to get |
| 643 | * @details If @p outputName is empty, runs forward pass for the whole network. |
| 644 | */ |
| 645 | CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs, const String& outputName = String()); |
| 646 | |
| 647 | /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. |
| 648 | * @param outputBlobs contains blobs for first outputs of specified layers. |
| 649 | * @param outBlobNames names for layers which outputs are needed to get |
| 650 | */ |
| 651 | CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs, |
| 652 | const std::vector<String>& outBlobNames); |
| 653 | |
| 654 | /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. |
| 655 | * @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames. |
| 656 | * @param outBlobNames names for layers which outputs are needed to get |
| 657 | */ |
| 658 | CV_WRAP_AS(forwardAndRetrieve) void forward(CV_OUT std::vector<std::vector<Mat> >& outputBlobs, |
| 659 | const std::vector<String>& outBlobNames); |
| 660 | |
| 661 | /** @brief Returns a quantized Net from a floating-point Net. |
| 662 | * @param calibData Calibration data to compute the quantization parameters. |
| 663 | * @param inputsDtype Datatype of quantized net's inputs. Can be CV_32F or CV_8S. |
| 664 | * @param outputsDtype Datatype of quantized net's outputs. Can be CV_32F or CV_8S. |
| 665 | * @param perChannel Quantization granularity of quantized Net. The default is true, that means quantize model |
| 666 | * in per-channel way (channel-wise). Set it false to quantize model in per-tensor way (or tensor-wise). |
| 667 | */ |
| 668 | CV_WRAP Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype, bool perChannel=true); |
| 669 | |
| 670 | /** @brief Returns input scale and zeropoint for a quantized Net. |
| 671 | * @param scales output parameter for returning input scales. |
| 672 | * @param zeropoints output parameter for returning input zeropoints. |
| 673 | */ |
| 674 | CV_WRAP void getInputDetails(CV_OUT std::vector<float>& scales, CV_OUT std::vector<int>& zeropoints) const; |
| 675 | |
| 676 | /** @brief Returns output scale and zeropoint for a quantized Net. |
| 677 | * @param scales output parameter for returning output scales. |
| 678 | * @param zeropoints output parameter for returning output zeropoints. |
| 679 | */ |
| 680 | CV_WRAP void getOutputDetails(CV_OUT std::vector<float>& scales, CV_OUT std::vector<int>& zeropoints) const; |
| 681 | |
| 682 | /** |
| 683 | * @brief Compile Halide layers. |
| 684 | * @param[in] scheduler Path to YAML file with scheduling directives. |
| 685 | * @see setPreferableBackend |
| 686 | * |
| 687 | * Schedule layers that support Halide backend. Then compile them for |
| 688 | * specific target. For layers that not represented in scheduling file |
| 689 | * or if no manual scheduling used at all, automatic scheduling will be applied. |
| 690 | */ |
| 691 | CV_WRAP void setHalideScheduler(const String& scheduler); |
| 692 | |
| 693 | /** |
| 694 | * @brief Ask network to use specific computation backend where it supported. |
| 695 | * @param[in] backendId backend identifier. |
| 696 | * @see Backend |
| 697 | */ |
| 698 | CV_WRAP void setPreferableBackend(int backendId); |
| 699 | |
| 700 | /** |
| 701 | * @brief Ask network to make computations on specific target device. |
| 702 | * @param[in] targetId target identifier. |
| 703 | * @see Target |
| 704 | * |
| 705 | * List of supported combinations backend / target: |
| 706 | * | | DNN_BACKEND_OPENCV | DNN_BACKEND_INFERENCE_ENGINE | DNN_BACKEND_HALIDE | DNN_BACKEND_CUDA | |
| 707 | * |------------------------|--------------------|------------------------------|--------------------|-------------------| |
| 708 | * | DNN_TARGET_CPU | + | + | + | | |
| 709 | * | DNN_TARGET_OPENCL | + | + | + | | |
| 710 | * | DNN_TARGET_OPENCL_FP16 | + | + | | | |
| 711 | * | DNN_TARGET_MYRIAD | | + | | | |
| 712 | * | DNN_TARGET_FPGA | | + | | | |
| 713 | * | DNN_TARGET_CUDA | | | | + | |
| 714 | * | DNN_TARGET_CUDA_FP16 | | | | + | |
| 715 | * | DNN_TARGET_HDDL | | + | | | |
| 716 | */ |
| 717 | CV_WRAP void setPreferableTarget(int targetId); |
| 718 | |
| 719 | /** @brief Sets the new input value for the network |
| 720 | * @param blob A new blob. Should have CV_32F or CV_8U depth. |
| 721 | * @param name A name of input layer. |
| 722 | * @param scalefactor An optional normalization scale. |
| 723 | * @param mean An optional mean subtraction values. |
| 724 | * @see connect(String, String) to know format of the descriptor. |
| 725 | * |
| 726 | * If scale or mean values are specified, a final input blob is computed |
| 727 | * as: |
| 728 | * \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f] |
| 729 | */ |
| 730 | CV_WRAP void setInput(CV_ND InputArray blob, const String& name = "" , |
| 731 | double scalefactor = 1.0, const Scalar& mean = Scalar()); |
| 732 | |
| 733 | /** @brief Sets the new value for the learned param of the layer. |
| 734 | * @param layer name or id of the layer. |
| 735 | * @param numParam index of the layer parameter in the Layer::blobs array. |
| 736 | * @param blob the new value. |
| 737 | * @see Layer::blobs |
| 738 | * @note If shape of the new blob differs from the previous shape, |
| 739 | * then the following forward pass may fail. |
| 740 | */ |
| 741 | CV_WRAP void setParam(int layer, int numParam, CV_ND const Mat &blob); |
| 742 | CV_WRAP inline void setParam(const String& layerName, int numParam, CV_ND const Mat &blob) { return setParam(layer: getLayerId(layer: layerName), numParam, blob); } |
| 743 | |
| 744 | /** @brief Returns parameter blob of the layer. |
| 745 | * @param layer name or id of the layer. |
| 746 | * @param numParam index of the layer parameter in the Layer::blobs array. |
| 747 | * @see Layer::blobs |
| 748 | */ |
| 749 | CV_WRAP Mat getParam(int layer, int numParam = 0) const; |
| 750 | CV_WRAP inline Mat getParam(const String& layerName, int numParam = 0) const { return getParam(layer: getLayerId(layer: layerName), numParam); } |
| 751 | |
| 752 | /** @brief Returns indexes of layers with unconnected outputs. |
| 753 | * |
| 754 | * FIXIT: Rework API to registerOutput() approach, deprecate this call |
| 755 | */ |
| 756 | CV_WRAP std::vector<int> getUnconnectedOutLayers() const; |
| 757 | |
| 758 | /** @brief Returns names of layers with unconnected outputs. |
| 759 | * |
| 760 | * FIXIT: Rework API to registerOutput() approach, deprecate this call |
| 761 | */ |
| 762 | CV_WRAP std::vector<String> getUnconnectedOutLayersNames() const; |
| 763 | |
| 764 | /** @brief Returns input and output shapes for all layers in loaded model; |
| 765 | * preliminary inferencing isn't necessary. |
| 766 | * @param netInputShapes shapes for all input blobs in net input layer. |
| 767 | * @param layersIds output parameter for layer IDs. |
| 768 | * @param inLayersShapes output parameter for input layers shapes; |
| 769 | * order is the same as in layersIds |
| 770 | * @param outLayersShapes output parameter for output layers shapes; |
| 771 | * order is the same as in layersIds |
| 772 | */ |
| 773 | CV_WRAP void (const std::vector<MatShape>& netInputShapes, |
| 774 | CV_OUT std::vector<int>& layersIds, |
| 775 | CV_OUT std::vector<std::vector<MatShape> >& , |
| 776 | CV_OUT std::vector<std::vector<MatShape> >& ) const; |
| 777 | |
| 778 | /** @overload */ |
| 779 | CV_WRAP void (const MatShape& netInputShape, |
| 780 | CV_OUT std::vector<int>& layersIds, |
| 781 | CV_OUT std::vector<std::vector<MatShape> >& , |
| 782 | CV_OUT std::vector<std::vector<MatShape> >& ) const; |
| 783 | |
| 784 | /** @brief Returns input and output shapes for layer with specified |
| 785 | * id in loaded model; preliminary inferencing isn't necessary. |
| 786 | * @param netInputShape shape input blob in net input layer. |
| 787 | * @param layerId id for layer. |
| 788 | * @param inLayerShapes output parameter for input layers shapes; |
| 789 | * order is the same as in layersIds |
| 790 | * @param outLayerShapes output parameter for output layers shapes; |
| 791 | * order is the same as in layersIds |
| 792 | */ |
| 793 | void getLayerShapes(const MatShape& netInputShape, |
| 794 | const int layerId, |
| 795 | CV_OUT std::vector<MatShape>& inLayerShapes, |
| 796 | CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP |
| 797 | |
| 798 | /** @overload */ |
| 799 | void getLayerShapes(const std::vector<MatShape>& netInputShapes, |
| 800 | const int layerId, |
| 801 | CV_OUT std::vector<MatShape>& inLayerShapes, |
| 802 | CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP |
| 803 | |
| 804 | /** @brief Computes FLOP for whole loaded model with specified input shapes. |
| 805 | * @param netInputShapes vector of shapes for all net inputs. |
| 806 | * @returns computed FLOP. |
| 807 | */ |
| 808 | CV_WRAP int64 getFLOPS(const std::vector<MatShape>& netInputShapes) const; |
| 809 | /** @overload */ |
| 810 | CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const; |
| 811 | /** @overload */ |
| 812 | CV_WRAP int64 getFLOPS(const int layerId, |
| 813 | const std::vector<MatShape>& netInputShapes) const; |
| 814 | /** @overload */ |
| 815 | CV_WRAP int64 getFLOPS(const int layerId, |
| 816 | const MatShape& netInputShape) const; |
| 817 | |
| 818 | /** @brief Returns list of types for layer used in model. |
| 819 | * @param layersTypes output parameter for returning types. |
| 820 | */ |
| 821 | CV_WRAP void getLayerTypes(CV_OUT std::vector<String>& layersTypes) const; |
| 822 | |
| 823 | /** @brief Returns count of layers of specified type. |
| 824 | * @param layerType type. |
| 825 | * @returns count of layers |
| 826 | */ |
| 827 | CV_WRAP int getLayersCount(const String& layerType) const; |
| 828 | |
| 829 | /** @brief Computes bytes number which are required to store |
| 830 | * all weights and intermediate blobs for model. |
| 831 | * @param netInputShapes vector of shapes for all net inputs. |
| 832 | * @param weights output parameter to store resulting bytes for weights. |
| 833 | * @param blobs output parameter to store resulting bytes for intermediate blobs. |
| 834 | */ |
| 835 | void getMemoryConsumption(const std::vector<MatShape>& netInputShapes, |
| 836 | CV_OUT size_t& weights, CV_OUT size_t& blobs) const; // FIXIT: CV_WRAP |
| 837 | /** @overload */ |
| 838 | CV_WRAP void getMemoryConsumption(const MatShape& netInputShape, |
| 839 | CV_OUT size_t& weights, CV_OUT size_t& blobs) const; |
| 840 | /** @overload */ |
| 841 | CV_WRAP void getMemoryConsumption(const int layerId, |
| 842 | const std::vector<MatShape>& netInputShapes, |
| 843 | CV_OUT size_t& weights, CV_OUT size_t& blobs) const; |
| 844 | /** @overload */ |
| 845 | CV_WRAP void getMemoryConsumption(const int layerId, |
| 846 | const MatShape& netInputShape, |
| 847 | CV_OUT size_t& weights, CV_OUT size_t& blobs) const; |
| 848 | |
| 849 | /** @brief Computes bytes number which are required to store |
| 850 | * all weights and intermediate blobs for each layer. |
| 851 | * @param netInputShapes vector of shapes for all net inputs. |
| 852 | * @param layerIds output vector to save layer IDs. |
| 853 | * @param weights output parameter to store resulting bytes for weights. |
| 854 | * @param blobs output parameter to store resulting bytes for intermediate blobs. |
| 855 | */ |
| 856 | void getMemoryConsumption(const std::vector<MatShape>& netInputShapes, |
| 857 | CV_OUT std::vector<int>& layerIds, |
| 858 | CV_OUT std::vector<size_t>& weights, |
| 859 | CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP |
| 860 | /** @overload */ |
| 861 | void getMemoryConsumption(const MatShape& netInputShape, |
| 862 | CV_OUT std::vector<int>& layerIds, |
| 863 | CV_OUT std::vector<size_t>& weights, |
| 864 | CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP |
| 865 | |
| 866 | /** @brief Enables or disables layer fusion in the network. |
| 867 | * @param fusion true to enable the fusion, false to disable. The fusion is enabled by default. |
| 868 | */ |
| 869 | CV_WRAP void enableFusion(bool fusion); |
| 870 | |
| 871 | /** @brief Enables or disables the Winograd compute branch. The Winograd compute branch can speed up |
| 872 | * 3x3 Convolution at a small loss of accuracy. |
| 873 | * @param useWinograd true to enable the Winograd compute branch. The default is true. |
| 874 | */ |
| 875 | CV_WRAP void enableWinograd(bool useWinograd); |
| 876 | |
| 877 | /** @brief Returns overall time for inference and timings (in ticks) for layers. |
| 878 | * |
| 879 | * Indexes in returned vector correspond to layers ids. Some layers can be fused with others, |
| 880 | * in this case zero ticks count will be return for that skipped layers. Supported by DNN_BACKEND_OPENCV on DNN_TARGET_CPU only. |
| 881 | * |
| 882 | * @param[out] timings vector for tick timings for all layers. |
| 883 | * @return overall ticks for model inference. |
| 884 | */ |
| 885 | CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings); |
| 886 | |
| 887 | |
| 888 | struct Impl; |
| 889 | inline Impl* getImpl() const { return impl.get(); } |
| 890 | inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); } |
| 891 | friend class accessor::DnnNetAccessor; |
| 892 | protected: |
| 893 | Ptr<Impl> impl; |
| 894 | }; |
| 895 | |
| 896 | /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files. |
| 897 | * @param cfgFile path to the .cfg file with text description of the network architecture. |
| 898 | * @param darknetModel path to the .weights file with learned network. |
| 899 | * @returns Network object that ready to do forward, throw an exception in failure cases. |
| 900 | */ |
| 901 | CV_EXPORTS_W Net readNetFromDarknet(CV_WRAP_FILE_PATH const String &cfgFile, CV_WRAP_FILE_PATH const String &darknetModel = String()); |
| 902 | |
| 903 | /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files. |
| 904 | * @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture. |
| 905 | * @param bufferModel A buffer contains a content of .weights file with learned network. |
| 906 | * @returns Net object. |
| 907 | */ |
| 908 | CV_EXPORTS_W Net readNetFromDarknet(const std::vector<uchar>& bufferCfg, |
| 909 | const std::vector<uchar>& bufferModel = std::vector<uchar>()); |
| 910 | |
| 911 | /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files. |
| 912 | * @param bufferCfg A buffer contains a content of .cfg file with text description of the network architecture. |
| 913 | * @param lenCfg Number of bytes to read from bufferCfg |
| 914 | * @param bufferModel A buffer contains a content of .weights file with learned network. |
| 915 | * @param lenModel Number of bytes to read from bufferModel |
| 916 | * @returns Net object. |
| 917 | */ |
| 918 | CV_EXPORTS Net readNetFromDarknet(const char *bufferCfg, size_t lenCfg, |
| 919 | const char *bufferModel = NULL, size_t lenModel = 0); |
| 920 | |
| 921 | /** @brief Reads a network model stored in <a href="http://caffe.berkeleyvision.org">Caffe</a> framework's format. |
| 922 | * @param prototxt path to the .prototxt file with text description of the network architecture. |
| 923 | * @param caffeModel path to the .caffemodel file with learned network. |
| 924 | * @returns Net object. |
| 925 | */ |
| 926 | CV_EXPORTS_W Net readNetFromCaffe(CV_WRAP_FILE_PATH const String &prototxt, CV_WRAP_FILE_PATH const String &caffeModel = String()); |
| 927 | |
| 928 | /** @brief Reads a network model stored in Caffe model in memory. |
| 929 | * @param bufferProto buffer containing the content of the .prototxt file |
| 930 | * @param bufferModel buffer containing the content of the .caffemodel file |
| 931 | * @returns Net object. |
| 932 | */ |
| 933 | CV_EXPORTS_W Net readNetFromCaffe(const std::vector<uchar>& bufferProto, |
| 934 | const std::vector<uchar>& bufferModel = std::vector<uchar>()); |
| 935 | |
| 936 | /** @brief Reads a network model stored in Caffe model in memory. |
| 937 | * @details This is an overloaded member function, provided for convenience. |
| 938 | * It differs from the above function only in what argument(s) it accepts. |
| 939 | * @param bufferProto buffer containing the content of the .prototxt file |
| 940 | * @param lenProto length of bufferProto |
| 941 | * @param bufferModel buffer containing the content of the .caffemodel file |
| 942 | * @param lenModel length of bufferModel |
| 943 | * @returns Net object. |
| 944 | */ |
| 945 | CV_EXPORTS Net readNetFromCaffe(const char *bufferProto, size_t lenProto, |
| 946 | const char *bufferModel = NULL, size_t lenModel = 0); |
| 947 | |
| 948 | /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format. |
| 949 | * @param model path to the .pb file with binary protobuf description of the network architecture |
| 950 | * @param config path to the .pbtxt file that contains text graph definition in protobuf format. |
| 951 | * Resulting Net object is built by text graph using weights from a binary one that |
| 952 | * let us make it more flexible. |
| 953 | * @returns Net object. |
| 954 | */ |
| 955 | CV_EXPORTS_W Net readNetFromTensorflow(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config = String()); |
| 956 | |
| 957 | /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format. |
| 958 | * @param bufferModel buffer containing the content of the pb file |
| 959 | * @param bufferConfig buffer containing the content of the pbtxt file |
| 960 | * @returns Net object. |
| 961 | */ |
| 962 | CV_EXPORTS_W Net readNetFromTensorflow(const std::vector<uchar>& bufferModel, |
| 963 | const std::vector<uchar>& bufferConfig = std::vector<uchar>()); |
| 964 | |
| 965 | /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format. |
| 966 | * @details This is an overloaded member function, provided for convenience. |
| 967 | * It differs from the above function only in what argument(s) it accepts. |
| 968 | * @param bufferModel buffer containing the content of the pb file |
| 969 | * @param lenModel length of bufferModel |
| 970 | * @param bufferConfig buffer containing the content of the pbtxt file |
| 971 | * @param lenConfig length of bufferConfig |
| 972 | */ |
| 973 | CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel, |
| 974 | const char *bufferConfig = NULL, size_t lenConfig = 0); |
| 975 | |
| 976 | /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format. |
| 977 | * @param model path to the .tflite file with binary flatbuffers description of the network architecture |
| 978 | * @returns Net object. |
| 979 | */ |
| 980 | CV_EXPORTS_W Net readNetFromTFLite(CV_WRAP_FILE_PATH const String &model); |
| 981 | |
| 982 | /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format. |
| 983 | * @param bufferModel buffer containing the content of the tflite file |
| 984 | * @returns Net object. |
| 985 | */ |
| 986 | CV_EXPORTS_W Net readNetFromTFLite(const std::vector<uchar>& bufferModel); |
| 987 | |
| 988 | /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/lite">TFLite</a> framework's format. |
| 989 | * @details This is an overloaded member function, provided for convenience. |
| 990 | * It differs from the above function only in what argument(s) it accepts. |
| 991 | * @param bufferModel buffer containing the content of the tflite file |
| 992 | * @param lenModel length of bufferModel |
| 993 | */ |
| 994 | CV_EXPORTS Net readNetFromTFLite(const char *bufferModel, size_t lenModel); |
| 995 | |
| 996 | /** |
| 997 | * @brief Reads a network model stored in <a href="http://torch.ch">Torch7</a> framework's format. |
| 998 | * @param model path to the file, dumped from Torch by using torch.save() function. |
| 999 | * @param isBinary specifies whether the network was serialized in ascii mode or binary. |
| 1000 | * @param evaluate specifies testing phase of network. If true, it's similar to evaluate() method in Torch. |
| 1001 | * @returns Net object. |
| 1002 | * |
| 1003 | * @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language, |
| 1004 | * which has various bit-length on different systems. |
| 1005 | * |
| 1006 | * The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object |
| 1007 | * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors. |
| 1008 | * |
| 1009 | * List of supported layers (i.e. object instances derived from Torch nn.Module class): |
| 1010 | * - nn.Sequential |
| 1011 | * - nn.Parallel |
| 1012 | * - nn.Concat |
| 1013 | * - nn.Linear |
| 1014 | * - nn.SpatialConvolution |
| 1015 | * - nn.SpatialMaxPooling, nn.SpatialAveragePooling |
| 1016 | * - nn.ReLU, nn.TanH, nn.Sigmoid |
| 1017 | * - nn.Reshape |
| 1018 | * - nn.SoftMax, nn.LogSoftMax |
| 1019 | * |
| 1020 | * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported. |
| 1021 | */ |
| 1022 | CV_EXPORTS_W Net readNetFromTorch(CV_WRAP_FILE_PATH const String &model, bool isBinary = true, bool evaluate = true); |
| 1023 | |
| 1024 | /** |
| 1025 | * @brief Read deep learning network represented in one of the supported formats. |
| 1026 | * @param[in] model Binary file contains trained weights. The following file |
| 1027 | * extensions are expected for models from different frameworks: |
| 1028 | * * `*.caffemodel` (Caffe, http://caffe.berkeleyvision.org/) |
| 1029 | * * `*.pb` (TensorFlow, https://www.tensorflow.org/) |
| 1030 | * * `*.t7` | `*.net` (Torch, http://torch.ch/) |
| 1031 | * * `*.weights` (Darknet, https://pjreddie.com/darknet/) |
| 1032 | * * `*.bin` | `*.onnx` (OpenVINO, https://software.intel.com/openvino-toolkit) |
| 1033 | * * `*.onnx` (ONNX, https://onnx.ai/) |
| 1034 | * @param[in] config Text file contains network configuration. It could be a |
| 1035 | * file with the following extensions: |
| 1036 | * * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/) |
| 1037 | * * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/) |
| 1038 | * * `*.cfg` (Darknet, https://pjreddie.com/darknet/) |
| 1039 | * * `*.xml` (OpenVINO, https://software.intel.com/openvino-toolkit) |
| 1040 | * @param[in] framework Explicit framework name tag to determine a format. |
| 1041 | * @returns Net object. |
| 1042 | * |
| 1043 | * This function automatically detects an origin framework of trained model |
| 1044 | * and calls an appropriate function such @ref readNetFromCaffe, @ref readNetFromTensorflow, |
| 1045 | * @ref readNetFromTorch or @ref readNetFromDarknet. An order of @p model and @p config |
| 1046 | * arguments does not matter. |
| 1047 | */ |
| 1048 | CV_EXPORTS_W Net readNet(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "" , const String& framework = "" ); |
| 1049 | |
| 1050 | /** |
| 1051 | * @brief Read deep learning network represented in one of the supported formats. |
| 1052 | * @details This is an overloaded member function, provided for convenience. |
| 1053 | * It differs from the above function only in what argument(s) it accepts. |
| 1054 | * @param[in] framework Name of origin framework. |
| 1055 | * @param[in] bufferModel A buffer with a content of binary file with weights |
| 1056 | * @param[in] bufferConfig A buffer with a content of text file contains network configuration. |
| 1057 | * @returns Net object. |
| 1058 | */ |
| 1059 | CV_EXPORTS_W Net readNet(const String& framework, const std::vector<uchar>& bufferModel, |
| 1060 | const std::vector<uchar>& bufferConfig = std::vector<uchar>()); |
| 1061 | |
| 1062 | /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework. |
| 1063 | * @warning This function has the same limitations as readNetFromTorch(). |
| 1064 | */ |
| 1065 | CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true); |
| 1066 | |
| 1067 | /** @brief Load a network from Intel's Model Optimizer intermediate representation. |
| 1068 | * @param[in] xml XML configuration file with network's topology. |
| 1069 | * @param[in] bin Binary file with trained weights. |
| 1070 | * @returns Net object. |
| 1071 | * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine |
| 1072 | * backend. |
| 1073 | */ |
| 1074 | CV_EXPORTS_W |
| 1075 | Net readNetFromModelOptimizer(CV_WRAP_FILE_PATH const String &xml, CV_WRAP_FILE_PATH const String &bin = "" ); |
| 1076 | |
| 1077 | /** @brief Load a network from Intel's Model Optimizer intermediate representation. |
| 1078 | * @param[in] bufferModelConfig Buffer contains XML configuration with network's topology. |
| 1079 | * @param[in] bufferWeights Buffer contains binary data with trained weights. |
| 1080 | * @returns Net object. |
| 1081 | * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine |
| 1082 | * backend. |
| 1083 | */ |
| 1084 | CV_EXPORTS_W |
| 1085 | Net readNetFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights); |
| 1086 | |
| 1087 | /** @brief Load a network from Intel's Model Optimizer intermediate representation. |
| 1088 | * @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology. |
| 1089 | * @param[in] bufferModelConfigSize Binary size of XML configuration data. |
| 1090 | * @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights. |
| 1091 | * @param[in] bufferWeightsSize Binary size of trained weights data. |
| 1092 | * @returns Net object. |
| 1093 | * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine |
| 1094 | * backend. |
| 1095 | */ |
| 1096 | CV_EXPORTS |
| 1097 | Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, |
| 1098 | const uchar* bufferWeightsPtr, size_t bufferWeightsSize); |
| 1099 | |
| 1100 | /** @brief Reads a network model <a href="https://onnx.ai/">ONNX</a>. |
| 1101 | * @param onnxFile path to the .onnx file with text description of the network architecture. |
| 1102 | * @returns Network object that ready to do forward, throw an exception in failure cases. |
| 1103 | */ |
| 1104 | CV_EXPORTS_W Net readNetFromONNX(CV_WRAP_FILE_PATH const String &onnxFile); |
| 1105 | |
| 1106 | /** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a> |
| 1107 | * in-memory buffer. |
| 1108 | * @param buffer memory address of the first byte of the buffer. |
| 1109 | * @param sizeBuffer size of the buffer. |
| 1110 | * @returns Network object that ready to do forward, throw an exception |
| 1111 | * in failure cases. |
| 1112 | */ |
| 1113 | CV_EXPORTS Net readNetFromONNX(const char* buffer, size_t sizeBuffer); |
| 1114 | |
| 1115 | /** @brief Reads a network model from <a href="https://onnx.ai/">ONNX</a> |
| 1116 | * in-memory buffer. |
| 1117 | * @param buffer in-memory buffer that stores the ONNX model bytes. |
| 1118 | * @returns Network object that ready to do forward, throw an exception |
| 1119 | * in failure cases. |
| 1120 | */ |
| 1121 | CV_EXPORTS_W Net readNetFromONNX(const std::vector<uchar>& buffer); |
| 1122 | |
| 1123 | /** @brief Creates blob from .pb file. |
| 1124 | * @param path to the .pb file with input tensor. |
| 1125 | * @returns Mat. |
| 1126 | */ |
| 1127 | CV_EXPORTS_W Mat readTensorFromONNX(CV_WRAP_FILE_PATH const String& path); |
| 1128 | |
| 1129 | /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center, |
| 1130 | * subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels. |
| 1131 | * @param image input image (with 1-, 3- or 4-channels). |
| 1132 | * @param scalefactor multiplier for @p images values. |
| 1133 | * @param size spatial size for output image |
| 1134 | * @param mean scalar with mean values which are subtracted from channels. Values are intended |
| 1135 | * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. |
| 1136 | * @param swapRB flag which indicates that swap first and last channels |
| 1137 | * in 3-channel image is necessary. |
| 1138 | * @param crop flag which indicates whether image will be cropped after resize or not |
| 1139 | * @param ddepth Depth of output blob. Choose CV_32F or CV_8U. |
| 1140 | * @details if @p crop is true, input image is resized so one side after resize is equal to corresponding |
| 1141 | * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. |
| 1142 | * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. |
| 1143 | * @returns 4-dimensional Mat with NCHW dimensions order. |
| 1144 | * |
| 1145 | * @note |
| 1146 | * The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor. |
| 1147 | */ |
| 1148 | CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), |
| 1149 | const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, |
| 1150 | int ddepth=CV_32F); |
| 1151 | |
| 1152 | /** @brief Creates 4-dimensional blob from image. |
| 1153 | * @details This is an overloaded member function, provided for convenience. |
| 1154 | * It differs from the above function only in what argument(s) it accepts. |
| 1155 | */ |
| 1156 | CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0, |
| 1157 | const Size& size = Size(), const Scalar& mean = Scalar(), |
| 1158 | bool swapRB=false, bool crop=false, int ddepth=CV_32F); |
| 1159 | |
| 1160 | |
| 1161 | /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and |
| 1162 | * crops @p images from center, subtract @p mean values, scales values by @p scalefactor, |
| 1163 | * swap Blue and Red channels. |
| 1164 | * @param images input images (all with 1-, 3- or 4-channels). |
| 1165 | * @param size spatial size for output image |
| 1166 | * @param mean scalar with mean values which are subtracted from channels. Values are intended |
| 1167 | * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. |
| 1168 | * @param scalefactor multiplier for @p images values. |
| 1169 | * @param swapRB flag which indicates that swap first and last channels |
| 1170 | * in 3-channel image is necessary. |
| 1171 | * @param crop flag which indicates whether image will be cropped after resize or not |
| 1172 | * @param ddepth Depth of output blob. Choose CV_32F or CV_8U. |
| 1173 | * @details if @p crop is true, input image is resized so one side after resize is equal to corresponding |
| 1174 | * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. |
| 1175 | * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. |
| 1176 | * @returns 4-dimensional Mat with NCHW dimensions order. |
| 1177 | * |
| 1178 | * @note |
| 1179 | * The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor. |
| 1180 | */ |
| 1181 | CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0, |
| 1182 | Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, |
| 1183 | int ddepth=CV_32F); |
| 1184 | |
| 1185 | /** @brief Creates 4-dimensional blob from series of images. |
| 1186 | * @details This is an overloaded member function, provided for convenience. |
| 1187 | * It differs from the above function only in what argument(s) it accepts. |
| 1188 | */ |
| 1189 | CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob, |
| 1190 | double scalefactor=1.0, Size size = Size(), |
| 1191 | const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, |
| 1192 | int ddepth=CV_32F); |
| 1193 | |
| 1194 | /** |
| 1195 | * @brief Enum of image processing mode. |
| 1196 | * To facilitate the specialization pre-processing requirements of the dnn model. |
| 1197 | * For example, the `letter box` often used in the Yolo series of models. |
| 1198 | * @see Image2BlobParams |
| 1199 | */ |
| 1200 | enum ImagePaddingMode |
| 1201 | { |
| 1202 | DNN_PMODE_NULL = 0, // !< Default. Resize to required input size without extra processing. |
| 1203 | DNN_PMODE_CROP_CENTER = 1, // !< Image will be cropped after resize. |
| 1204 | DNN_PMODE_LETTERBOX = 2, // !< Resize image to the desired size while preserving the aspect ratio of original image. |
| 1205 | }; |
| 1206 | |
| 1207 | /** @brief Processing params of image to blob. |
| 1208 | * |
| 1209 | * It includes all possible image processing operations and corresponding parameters. |
| 1210 | * |
| 1211 | * @see blobFromImageWithParams |
| 1212 | * |
| 1213 | * @note |
| 1214 | * The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor. |
| 1215 | * The order and usage of `scalefactor`, `size`, `mean`, `swapRB`, and `ddepth` are consistent |
| 1216 | * with the function of @ref blobFromImage. |
| 1217 | */ |
| 1218 | struct CV_EXPORTS_W_SIMPLE Image2BlobParams |
| 1219 | { |
| 1220 | CV_WRAP Image2BlobParams(); |
| 1221 | CV_WRAP Image2BlobParams(const Scalar& scalefactor, const Size& size = Size(), const Scalar& mean = Scalar(), |
| 1222 | bool swapRB = false, int ddepth = CV_32F, dnn::DataLayout datalayout = DNN_LAYOUT_NCHW, |
| 1223 | ImagePaddingMode mode = dnn::DNN_PMODE_NULL, Scalar borderValue = 0.0); |
| 1224 | |
| 1225 | CV_PROP_RW Scalar scalefactor; //!< scalefactor multiplier for input image values. |
| 1226 | CV_PROP_RW Size size; //!< Spatial size for output image. |
| 1227 | CV_PROP_RW Scalar mean; //!< Scalar with mean values which are subtracted from channels. |
| 1228 | CV_PROP_RW bool swapRB; //!< Flag which indicates that swap first and last channels |
| 1229 | CV_PROP_RW int ddepth; //!< Depth of output blob. Choose CV_32F or CV_8U. |
| 1230 | CV_PROP_RW dnn::DataLayout datalayout; //!< Order of output dimensions. Choose DNN_LAYOUT_NCHW or DNN_LAYOUT_NHWC. |
| 1231 | CV_PROP_RW dnn::ImagePaddingMode paddingmode; //!< Image padding mode. @see ImagePaddingMode. |
| 1232 | CV_PROP_RW Scalar borderValue; //!< Value used in padding mode for padding. |
| 1233 | |
| 1234 | /** @brief Get rectangle coordinates in original image system from rectangle in blob coordinates. |
| 1235 | * @param rBlob rect in blob coordinates. |
| 1236 | * @param size original input image size. |
| 1237 | * @returns rectangle in original image coordinates. |
| 1238 | */ |
| 1239 | CV_WRAP Rect blobRectToImageRect(const Rect &rBlob, const Size &size); |
| 1240 | |
| 1241 | /** @brief Get rectangle coordinates in original image system from rectangle in blob coordinates. |
| 1242 | * @param rBlob rect in blob coordinates. |
| 1243 | * @param rImg result rect in image coordinates. |
| 1244 | * @param size original input image size. |
| 1245 | */ |
| 1246 | CV_WRAP void blobRectsToImageRects(const std::vector<Rect> &rBlob, CV_OUT std::vector<Rect>& rImg, const Size& size); |
| 1247 | }; |
| 1248 | |
| 1249 | /** @brief Creates 4-dimensional blob from image with given params. |
| 1250 | * |
| 1251 | * @details This function is an extension of @ref blobFromImage to meet more image preprocess needs. |
| 1252 | * Given input image and preprocessing parameters, and function outputs the blob. |
| 1253 | * |
| 1254 | * @param image input image (all with 1-, 3- or 4-channels). |
| 1255 | * @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob. |
| 1256 | * @return 4-dimensional Mat. |
| 1257 | */ |
| 1258 | CV_EXPORTS_W Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param = Image2BlobParams()); |
| 1259 | |
| 1260 | /** @overload */ |
| 1261 | CV_EXPORTS_W void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param = Image2BlobParams()); |
| 1262 | |
| 1263 | /** @brief Creates 4-dimensional blob from series of images with given params. |
| 1264 | * |
| 1265 | * @details This function is an extension of @ref blobFromImages to meet more image preprocess needs. |
| 1266 | * Given input image and preprocessing parameters, and function outputs the blob. |
| 1267 | * |
| 1268 | * @param images input image (all with 1-, 3- or 4-channels). |
| 1269 | * @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob. |
| 1270 | * @returns 4-dimensional Mat. |
| 1271 | */ |
| 1272 | CV_EXPORTS_W Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param = Image2BlobParams()); |
| 1273 | |
| 1274 | /** @overload */ |
| 1275 | CV_EXPORTS_W void blobFromImagesWithParams(InputArrayOfArrays images, OutputArray blob, const Image2BlobParams& param = Image2BlobParams()); |
| 1276 | |
| 1277 | /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure |
| 1278 | * (std::vector<cv::Mat>). |
| 1279 | * @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from |
| 1280 | * which you would like to extract the images. |
| 1281 | * @param[out] images_ array of 2D Mat containing the images extracted from the blob in floating point precision |
| 1282 | * (CV_32F). They are non normalized neither mean added. The number of returned images equals the first dimension |
| 1283 | * of the blob (batch size). Every image has a number of channels equals to the second dimension of the blob (depth). |
| 1284 | */ |
| 1285 | CV_EXPORTS_W void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_); |
| 1286 | |
| 1287 | /** @brief Convert all weights of Caffe network to half precision floating point. |
| 1288 | * @param src Path to origin model from Caffe framework contains single |
| 1289 | * precision floating point weights (usually has `.caffemodel` extension). |
| 1290 | * @param dst Path to destination model with updated weights. |
| 1291 | * @param layersTypes Set of layers types which parameters will be converted. |
| 1292 | * By default, converts only Convolutional and Fully-Connected layers' |
| 1293 | * weights. |
| 1294 | * |
| 1295 | * @note Shrinked model has no origin float32 weights so it can't be used |
| 1296 | * in origin Caffe framework anymore. However the structure of data |
| 1297 | * is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe. |
| 1298 | * So the resulting model may be used there. |
| 1299 | */ |
| 1300 | CV_EXPORTS_W void shrinkCaffeModel(CV_WRAP_FILE_PATH const String& src, CV_WRAP_FILE_PATH const String& dst, |
| 1301 | const std::vector<String>& layersTypes = std::vector<String>()); |
| 1302 | |
| 1303 | /** @brief Create a text representation for a binary network stored in protocol buffer format. |
| 1304 | * @param[in] model A path to binary network. |
| 1305 | * @param[in] output A path to output text file to be created. |
| 1306 | * |
| 1307 | * @note To reduce output file size, trained weights are not included. |
| 1308 | */ |
| 1309 | CV_EXPORTS_W void writeTextGraph(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& output); |
| 1310 | |
| 1311 | /** @brief Performs non maximum suppression given boxes and corresponding scores. |
| 1312 | |
| 1313 | * @param bboxes a set of bounding boxes to apply NMS. |
| 1314 | * @param scores a set of corresponding confidences. |
| 1315 | * @param score_threshold a threshold used to filter boxes by score. |
| 1316 | * @param nms_threshold a threshold used in non maximum suppression. |
| 1317 | * @param indices the kept indices of bboxes after NMS. |
| 1318 | * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$. |
| 1319 | * @param top_k if `>0`, keep at most @p top_k picked indices. |
| 1320 | */ |
| 1321 | CV_EXPORTS void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores, |
| 1322 | const float score_threshold, const float nms_threshold, |
| 1323 | CV_OUT std::vector<int>& indices, |
| 1324 | const float eta = 1.f, const int top_k = 0); |
| 1325 | |
| 1326 | CV_EXPORTS_W void NMSBoxes(const std::vector<Rect2d>& bboxes, const std::vector<float>& scores, |
| 1327 | const float score_threshold, const float nms_threshold, |
| 1328 | CV_OUT std::vector<int>& indices, |
| 1329 | const float eta = 1.f, const int top_k = 0); |
| 1330 | |
| 1331 | CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores, |
| 1332 | const float score_threshold, const float nms_threshold, |
| 1333 | CV_OUT std::vector<int>& indices, |
| 1334 | const float eta = 1.f, const int top_k = 0); |
| 1335 | |
| 1336 | /** @brief Performs batched non maximum suppression on given boxes and corresponding scores across different classes. |
| 1337 | |
| 1338 | * @param bboxes a set of bounding boxes to apply NMS. |
| 1339 | * @param scores a set of corresponding confidences. |
| 1340 | * @param class_ids a set of corresponding class ids. Ids are integer and usually start from 0. |
| 1341 | * @param score_threshold a threshold used to filter boxes by score. |
| 1342 | * @param nms_threshold a threshold used in non maximum suppression. |
| 1343 | * @param indices the kept indices of bboxes after NMS. |
| 1344 | * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$. |
| 1345 | * @param top_k if `>0`, keep at most @p top_k picked indices. |
| 1346 | */ |
| 1347 | CV_EXPORTS void NMSBoxesBatched(const std::vector<Rect>& bboxes, const std::vector<float>& scores, const std::vector<int>& class_ids, |
| 1348 | const float score_threshold, const float nms_threshold, |
| 1349 | CV_OUT std::vector<int>& indices, |
| 1350 | const float eta = 1.f, const int top_k = 0); |
| 1351 | |
| 1352 | CV_EXPORTS_W void NMSBoxesBatched(const std::vector<Rect2d>& bboxes, const std::vector<float>& scores, const std::vector<int>& class_ids, |
| 1353 | const float score_threshold, const float nms_threshold, |
| 1354 | CV_OUT std::vector<int>& indices, |
| 1355 | const float eta = 1.f, const int top_k = 0); |
| 1356 | |
| 1357 | /** |
| 1358 | * @brief Enum of Soft NMS methods. |
| 1359 | * @see softNMSBoxes |
| 1360 | */ |
| 1361 | enum class SoftNMSMethod |
| 1362 | { |
| 1363 | SOFTNMS_LINEAR = 1, |
| 1364 | SOFTNMS_GAUSSIAN = 2 |
| 1365 | }; |
| 1366 | |
| 1367 | /** @brief Performs soft non maximum suppression given boxes and corresponding scores. |
| 1368 | * Reference: https://arxiv.org/abs/1704.04503 |
| 1369 | * @param bboxes a set of bounding boxes to apply Soft NMS. |
| 1370 | * @param scores a set of corresponding confidences. |
| 1371 | * @param updated_scores a set of corresponding updated confidences. |
| 1372 | * @param score_threshold a threshold used to filter boxes by score. |
| 1373 | * @param nms_threshold a threshold used in non maximum suppression. |
| 1374 | * @param indices the kept indices of bboxes after NMS. |
| 1375 | * @param top_k keep at most @p top_k picked indices. |
| 1376 | * @param sigma parameter of Gaussian weighting. |
| 1377 | * @param method Gaussian or linear. |
| 1378 | * @see SoftNMSMethod |
| 1379 | */ |
| 1380 | CV_EXPORTS_W void softNMSBoxes(const std::vector<Rect>& bboxes, |
| 1381 | const std::vector<float>& scores, |
| 1382 | CV_OUT std::vector<float>& updated_scores, |
| 1383 | const float score_threshold, |
| 1384 | const float nms_threshold, |
| 1385 | CV_OUT std::vector<int>& indices, |
| 1386 | size_t top_k = 0, |
| 1387 | const float sigma = 0.5, |
| 1388 | SoftNMSMethod method = SoftNMSMethod::SOFTNMS_GAUSSIAN); |
| 1389 | |
| 1390 | |
| 1391 | /** @brief This class is presented high-level API for neural networks. |
| 1392 | * |
| 1393 | * Model allows to set params for preprocessing input image. |
| 1394 | * Model creates net from file with trained weights and config, |
| 1395 | * sets preprocessing input and runs forward pass. |
| 1396 | */ |
| 1397 | class CV_EXPORTS_W_SIMPLE Model |
| 1398 | { |
| 1399 | public: |
| 1400 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) |
| 1401 | Model(); |
| 1402 | |
| 1403 | Model(const Model&) = default; |
| 1404 | Model(Model&&) = default; |
| 1405 | Model& operator=(const Model&) = default; |
| 1406 | Model& operator=(Model&&) = default; |
| 1407 | |
| 1408 | /** |
| 1409 | * @brief Create model from deep learning network represented in one of the supported formats. |
| 1410 | * An order of @p model and @p config arguments does not matter. |
| 1411 | * @param[in] model Binary file contains trained weights. |
| 1412 | * @param[in] config Text file contains network configuration. |
| 1413 | */ |
| 1414 | CV_WRAP Model(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "" ); |
| 1415 | |
| 1416 | /** |
| 1417 | * @brief Create model from deep learning network. |
| 1418 | * @param[in] network Net object. |
| 1419 | */ |
| 1420 | CV_WRAP Model(const Net& network); |
| 1421 | |
| 1422 | /** @brief Set input size for frame. |
| 1423 | * @param[in] size New input size. |
| 1424 | * @note If shape of the new blob less than 0, then frame size not change. |
| 1425 | */ |
| 1426 | CV_WRAP Model& setInputSize(const Size& size); |
| 1427 | |
| 1428 | /** @overload |
| 1429 | * @param[in] width New input width. |
| 1430 | * @param[in] height New input height. |
| 1431 | */ |
| 1432 | CV_WRAP inline |
| 1433 | Model& setInputSize(int width, int height) { return setInputSize(Size(width, height)); } |
| 1434 | |
| 1435 | /** @brief Set mean value for frame. |
| 1436 | * @param[in] mean Scalar with mean values which are subtracted from channels. |
| 1437 | */ |
| 1438 | CV_WRAP Model& setInputMean(const Scalar& mean); |
| 1439 | |
| 1440 | /** @brief Set scalefactor value for frame. |
| 1441 | * @param[in] scale Multiplier for frame values. |
| 1442 | */ |
| 1443 | CV_WRAP Model& setInputScale(const Scalar& scale); |
| 1444 | |
| 1445 | /** @brief Set flag crop for frame. |
| 1446 | * @param[in] crop Flag which indicates whether image will be cropped after resize or not. |
| 1447 | */ |
| 1448 | CV_WRAP Model& setInputCrop(bool crop); |
| 1449 | |
| 1450 | /** @brief Set flag swapRB for frame. |
| 1451 | * @param[in] swapRB Flag which indicates that swap first and last channels. |
| 1452 | */ |
| 1453 | CV_WRAP Model& setInputSwapRB(bool swapRB); |
| 1454 | |
| 1455 | /** @brief Set output names for frame. |
| 1456 | * @param[in] outNames Names for output layers. |
| 1457 | */ |
| 1458 | CV_WRAP Model& setOutputNames(const std::vector<String>& outNames); |
| 1459 | |
| 1460 | /** @brief Set preprocessing parameters for frame. |
| 1461 | * @param[in] size New input size. |
| 1462 | * @param[in] mean Scalar with mean values which are subtracted from channels. |
| 1463 | * @param[in] scale Multiplier for frame values. |
| 1464 | * @param[in] swapRB Flag which indicates that swap first and last channels. |
| 1465 | * @param[in] crop Flag which indicates whether image will be cropped after resize or not. |
| 1466 | * blob(n, c, y, x) = scale * resize( frame(y, x, c) ) - mean(c) ) |
| 1467 | */ |
| 1468 | CV_WRAP void setInputParams(double scale = 1.0, const Size& size = Size(), |
| 1469 | const Scalar& mean = Scalar(), bool swapRB = false, bool crop = false); |
| 1470 | |
| 1471 | /** @brief Given the @p input frame, create input blob, run net and return the output @p blobs. |
| 1472 | * @param[in] frame The input image. |
| 1473 | * @param[out] outs Allocated output blobs, which will store results of the computation. |
| 1474 | */ |
| 1475 | CV_WRAP void predict(InputArray frame, OutputArrayOfArrays outs) const; |
| 1476 | |
| 1477 | |
| 1478 | // ============================== Net proxy methods ============================== |
| 1479 | // Never expose methods with network implementation details, like: |
| 1480 | // - addLayer, addLayerToPrev, connect, setInputsNames, setInputShape, setParam, getParam |
| 1481 | // - getLayer*, getUnconnectedOutLayers, getUnconnectedOutLayersNames, getLayersShapes |
| 1482 | // - forward* methods, setInput |
| 1483 | |
| 1484 | /// @sa Net::setPreferableBackend |
| 1485 | CV_WRAP Model& setPreferableBackend(dnn::Backend backendId); |
| 1486 | /// @sa Net::setPreferableTarget |
| 1487 | CV_WRAP Model& setPreferableTarget(dnn::Target targetId); |
| 1488 | |
| 1489 | /// @sa Net::enableWinograd |
| 1490 | CV_WRAP Model& enableWinograd(bool useWinograd); |
| 1491 | |
| 1492 | CV_DEPRECATED_EXTERNAL |
| 1493 | operator Net&() const { return getNetwork_(); } |
| 1494 | |
| 1495 | //protected: - internal/tests usage only |
| 1496 | Net& getNetwork_() const; |
| 1497 | inline Net& getNetwork_() { return const_cast<const Model*>(this)->getNetwork_(); } |
| 1498 | |
| 1499 | struct Impl; |
| 1500 | inline Impl* getImpl() const { return impl.get(); } |
| 1501 | inline Impl& getImplRef() const { CV_DbgAssert(impl); return *impl.get(); } |
| 1502 | protected: |
| 1503 | Ptr<Impl> impl; |
| 1504 | }; |
| 1505 | |
| 1506 | /** @brief This class represents high-level API for classification models. |
| 1507 | * |
| 1508 | * ClassificationModel allows to set params for preprocessing input image. |
| 1509 | * ClassificationModel creates net from file with trained weights and config, |
| 1510 | * sets preprocessing input, runs forward pass and return top-1 prediction. |
| 1511 | */ |
| 1512 | class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model |
| 1513 | { |
| 1514 | public: |
| 1515 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) |
| 1516 | ClassificationModel(); |
| 1517 | |
| 1518 | /** |
| 1519 | * @brief Create classification model from network represented in one of the supported formats. |
| 1520 | * An order of @p model and @p config arguments does not matter. |
| 1521 | * @param[in] model Binary file contains trained weights. |
| 1522 | * @param[in] config Text file contains network configuration. |
| 1523 | */ |
| 1524 | CV_WRAP ClassificationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "" ); |
| 1525 | |
| 1526 | /** |
| 1527 | * @brief Create model from deep learning network. |
| 1528 | * @param[in] network Net object. |
| 1529 | */ |
| 1530 | CV_WRAP ClassificationModel(const Net& network); |
| 1531 | |
| 1532 | /** |
| 1533 | * @brief Set enable/disable softmax post processing option. |
| 1534 | * |
| 1535 | * If this option is true, softmax is applied after forward inference within the classify() function |
| 1536 | * to convert the confidences range to [0.0-1.0]. |
| 1537 | * This function allows you to toggle this behavior. |
| 1538 | * Please turn true when not contain softmax layer in model. |
| 1539 | * @param[in] enable Set enable softmax post processing within the classify() function. |
| 1540 | */ |
| 1541 | CV_WRAP ClassificationModel& setEnableSoftmaxPostProcessing(bool enable); |
| 1542 | |
| 1543 | /** |
| 1544 | * @brief Get enable/disable softmax post processing option. |
| 1545 | * |
| 1546 | * This option defaults to false, softmax post processing is not applied within the classify() function. |
| 1547 | */ |
| 1548 | CV_WRAP bool getEnableSoftmaxPostProcessing() const; |
| 1549 | |
| 1550 | /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction. |
| 1551 | * @param[in] frame The input image. |
| 1552 | */ |
| 1553 | std::pair<int, float> classify(InputArray frame); |
| 1554 | |
| 1555 | /** @overload */ |
| 1556 | CV_WRAP void classify(InputArray frame, CV_OUT int& classId, CV_OUT float& conf); |
| 1557 | }; |
| 1558 | |
| 1559 | /** @brief This class represents high-level API for keypoints models |
| 1560 | * |
| 1561 | * KeypointsModel allows to set params for preprocessing input image. |
| 1562 | * KeypointsModel creates net from file with trained weights and config, |
| 1563 | * sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint |
| 1564 | */ |
| 1565 | class CV_EXPORTS_W_SIMPLE KeypointsModel: public Model |
| 1566 | { |
| 1567 | public: |
| 1568 | /** |
| 1569 | * @brief Create keypoints model from network represented in one of the supported formats. |
| 1570 | * An order of @p model and @p config arguments does not matter. |
| 1571 | * @param[in] model Binary file contains trained weights. |
| 1572 | * @param[in] config Text file contains network configuration. |
| 1573 | */ |
| 1574 | CV_WRAP KeypointsModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "" ); |
| 1575 | |
| 1576 | /** |
| 1577 | * @brief Create model from deep learning network. |
| 1578 | * @param[in] network Net object. |
| 1579 | */ |
| 1580 | CV_WRAP KeypointsModel(const Net& network); |
| 1581 | |
| 1582 | /** @brief Given the @p input frame, create input blob, run net |
| 1583 | * @param[in] frame The input image. |
| 1584 | * @param thresh minimum confidence threshold to select a keypoint |
| 1585 | * @returns a vector holding the x and y coordinates of each detected keypoint |
| 1586 | * |
| 1587 | */ |
| 1588 | CV_WRAP std::vector<Point2f> estimate(InputArray frame, float thresh=0.5); |
| 1589 | }; |
| 1590 | |
| 1591 | /** @brief This class represents high-level API for segmentation models |
| 1592 | * |
| 1593 | * SegmentationModel allows to set params for preprocessing input image. |
| 1594 | * SegmentationModel creates net from file with trained weights and config, |
| 1595 | * sets preprocessing input, runs forward pass and returns the class prediction for each pixel. |
| 1596 | */ |
| 1597 | class CV_EXPORTS_W_SIMPLE SegmentationModel: public Model |
| 1598 | { |
| 1599 | public: |
| 1600 | /** |
| 1601 | * @brief Create segmentation model from network represented in one of the supported formats. |
| 1602 | * An order of @p model and @p config arguments does not matter. |
| 1603 | * @param[in] model Binary file contains trained weights. |
| 1604 | * @param[in] config Text file contains network configuration. |
| 1605 | */ |
| 1606 | CV_WRAP SegmentationModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "" ); |
| 1607 | |
| 1608 | /** |
| 1609 | * @brief Create model from deep learning network. |
| 1610 | * @param[in] network Net object. |
| 1611 | */ |
| 1612 | CV_WRAP SegmentationModel(const Net& network); |
| 1613 | |
| 1614 | /** @brief Given the @p input frame, create input blob, run net |
| 1615 | * @param[in] frame The input image. |
| 1616 | * @param[out] mask Allocated class prediction for each pixel |
| 1617 | */ |
| 1618 | CV_WRAP void segment(InputArray frame, OutputArray mask); |
| 1619 | }; |
| 1620 | |
| 1621 | /** @brief This class represents high-level API for object detection networks. |
| 1622 | * |
| 1623 | * DetectionModel allows to set params for preprocessing input image. |
| 1624 | * DetectionModel creates net from file with trained weights and config, |
| 1625 | * sets preprocessing input, runs forward pass and return result detections. |
| 1626 | * For DetectionModel SSD, Faster R-CNN, YOLO topologies are supported. |
| 1627 | */ |
| 1628 | class CV_EXPORTS_W_SIMPLE DetectionModel : public Model |
| 1629 | { |
| 1630 | public: |
| 1631 | /** |
| 1632 | * @brief Create detection model from network represented in one of the supported formats. |
| 1633 | * An order of @p model and @p config arguments does not matter. |
| 1634 | * @param[in] model Binary file contains trained weights. |
| 1635 | * @param[in] config Text file contains network configuration. |
| 1636 | */ |
| 1637 | CV_WRAP DetectionModel(CV_WRAP_FILE_PATH const String& model, CV_WRAP_FILE_PATH const String& config = "" ); |
| 1638 | |
| 1639 | /** |
| 1640 | * @brief Create model from deep learning network. |
| 1641 | * @param[in] network Net object. |
| 1642 | */ |
| 1643 | CV_WRAP DetectionModel(const Net& network); |
| 1644 | |
| 1645 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first) |
| 1646 | DetectionModel(); |
| 1647 | |
| 1648 | /** |
| 1649 | * @brief nmsAcrossClasses defaults to false, |
| 1650 | * such that when non max suppression is used during the detect() function, it will do so per-class. |
| 1651 | * This function allows you to toggle this behaviour. |
| 1652 | * @param[in] value The new value for nmsAcrossClasses |
| 1653 | */ |
| 1654 | CV_WRAP DetectionModel& setNmsAcrossClasses(bool value); |
| 1655 | |
| 1656 | /** |
| 1657 | * @brief Getter for nmsAcrossClasses. This variable defaults to false, |
| 1658 | * such that when non max suppression is used during the detect() function, it will do so only per-class |
| 1659 | */ |
| 1660 | CV_WRAP bool getNmsAcrossClasses(); |
| 1661 | |
| 1662 | /** @brief Given the @p input frame, create input blob, run net and return result detections. |
| 1663 | * @param[in] frame The input image. |
| 1664 | * @param[out] classIds Class indexes in result detection. |
| 1665 | * @param[out] confidences A set of corresponding confidences. |
| 1666 | * @param[out] boxes A set of bounding boxes. |
| 1667 | * @param[in] confThreshold A threshold used to filter boxes by confidences. |
| 1668 | * @param[in] nmsThreshold A threshold used in non maximum suppression. |
| 1669 | */ |
| 1670 | CV_WRAP void detect(InputArray frame, CV_OUT std::vector<int>& classIds, |
| 1671 | CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes, |
| 1672 | float confThreshold = 0.5f, float nmsThreshold = 0.0f); |
| 1673 | }; |
| 1674 | |
| 1675 | |
| 1676 | /** @brief This class represents high-level API for text recognition networks. |
| 1677 | * |
| 1678 | * TextRecognitionModel allows to set params for preprocessing input image. |
| 1679 | * TextRecognitionModel creates net from file with trained weights and config, |
| 1680 | * sets preprocessing input, runs forward pass and return recognition result. |
| 1681 | * For TextRecognitionModel, CRNN-CTC is supported. |
| 1682 | */ |
| 1683 | class CV_EXPORTS_W_SIMPLE TextRecognitionModel : public Model |
| 1684 | { |
| 1685 | public: |
| 1686 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) |
| 1687 | TextRecognitionModel(); |
| 1688 | |
| 1689 | /** |
| 1690 | * @brief Create Text Recognition model from deep learning network |
| 1691 | * Call setDecodeType() and setVocabulary() after constructor to initialize the decoding method |
| 1692 | * @param[in] network Net object |
| 1693 | */ |
| 1694 | CV_WRAP TextRecognitionModel(const Net& network); |
| 1695 | |
| 1696 | /** |
| 1697 | * @brief Create text recognition model from network represented in one of the supported formats |
| 1698 | * Call setDecodeType() and setVocabulary() after constructor to initialize the decoding method |
| 1699 | * @param[in] model Binary file contains trained weights |
| 1700 | * @param[in] config Text file contains network configuration |
| 1701 | */ |
| 1702 | CV_WRAP inline |
| 1703 | TextRecognitionModel(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "" ) |
| 1704 | : TextRecognitionModel(readNet(model, config)) { /* nothing */ } |
| 1705 | |
| 1706 | /** |
| 1707 | * @brief Set the decoding method of translating the network output into string |
| 1708 | * @param[in] decodeType The decoding method of translating the network output into string, currently supported type: |
| 1709 | * - `"CTC-greedy"` greedy decoding for the output of CTC-based methods |
| 1710 | * - `"CTC-prefix-beam-search"` Prefix beam search decoding for the output of CTC-based methods |
| 1711 | */ |
| 1712 | CV_WRAP |
| 1713 | TextRecognitionModel& setDecodeType(const std::string& decodeType); |
| 1714 | |
| 1715 | /** |
| 1716 | * @brief Get the decoding method |
| 1717 | * @return the decoding method |
| 1718 | */ |
| 1719 | CV_WRAP |
| 1720 | const std::string& getDecodeType() const; |
| 1721 | |
| 1722 | /** |
| 1723 | * @brief Set the decoding method options for `"CTC-prefix-beam-search"` decode usage |
| 1724 | * @param[in] beamSize Beam size for search |
| 1725 | * @param[in] vocPruneSize Parameter to optimize big vocabulary search, |
| 1726 | * only take top @p vocPruneSize tokens in each search step, @p vocPruneSize <= 0 stands for disable this prune. |
| 1727 | */ |
| 1728 | CV_WRAP |
| 1729 | TextRecognitionModel& setDecodeOptsCTCPrefixBeamSearch(int beamSize, int vocPruneSize = 0); |
| 1730 | |
| 1731 | /** |
| 1732 | * @brief Set the vocabulary for recognition. |
| 1733 | * @param[in] vocabulary the associated vocabulary of the network. |
| 1734 | */ |
| 1735 | CV_WRAP |
| 1736 | TextRecognitionModel& setVocabulary(const std::vector<std::string>& vocabulary); |
| 1737 | |
| 1738 | /** |
| 1739 | * @brief Get the vocabulary for recognition. |
| 1740 | * @return vocabulary the associated vocabulary |
| 1741 | */ |
| 1742 | CV_WRAP |
| 1743 | const std::vector<std::string>& getVocabulary() const; |
| 1744 | |
| 1745 | /** |
| 1746 | * @brief Given the @p input frame, create input blob, run net and return recognition result |
| 1747 | * @param[in] frame The input image |
| 1748 | * @return The text recognition result |
| 1749 | */ |
| 1750 | CV_WRAP |
| 1751 | std::string recognize(InputArray frame) const; |
| 1752 | |
| 1753 | /** |
| 1754 | * @brief Given the @p input frame, create input blob, run net and return recognition result |
| 1755 | * @param[in] frame The input image |
| 1756 | * @param[in] roiRects List of text detection regions of interest (cv::Rect, CV_32SC4). ROIs is be cropped as the network inputs |
| 1757 | * @param[out] results A set of text recognition results. |
| 1758 | */ |
| 1759 | CV_WRAP |
| 1760 | void recognize(InputArray frame, InputArrayOfArrays roiRects, CV_OUT std::vector<std::string>& results) const; |
| 1761 | }; |
| 1762 | |
| 1763 | |
| 1764 | /** @brief Base class for text detection networks |
| 1765 | */ |
| 1766 | class CV_EXPORTS_W_SIMPLE TextDetectionModel : public Model |
| 1767 | { |
| 1768 | protected: |
| 1769 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) |
| 1770 | TextDetectionModel(); |
| 1771 | |
| 1772 | public: |
| 1773 | |
| 1774 | /** @brief Performs detection |
| 1775 | * |
| 1776 | * Given the input @p frame, prepare network input, run network inference, post-process network output and return result detections. |
| 1777 | * |
| 1778 | * Each result is quadrangle's 4 points in this order: |
| 1779 | * - bottom-left |
| 1780 | * - top-left |
| 1781 | * - top-right |
| 1782 | * - bottom-right |
| 1783 | * |
| 1784 | * Use cv::getPerspectiveTransform function to retrieve image region without perspective transformations. |
| 1785 | * |
| 1786 | * @note If DL model doesn't support that kind of output then result may be derived from detectTextRectangles() output. |
| 1787 | * |
| 1788 | * @param[in] frame The input image |
| 1789 | * @param[out] detections array with detections' quadrangles (4 points per result) |
| 1790 | * @param[out] confidences array with detection confidences |
| 1791 | */ |
| 1792 | CV_WRAP |
| 1793 | void detect( |
| 1794 | InputArray frame, |
| 1795 | CV_OUT std::vector< std::vector<Point> >& detections, |
| 1796 | CV_OUT std::vector<float>& confidences |
| 1797 | ) const; |
| 1798 | |
| 1799 | /** @overload */ |
| 1800 | CV_WRAP |
| 1801 | void detect( |
| 1802 | InputArray frame, |
| 1803 | CV_OUT std::vector< std::vector<Point> >& detections |
| 1804 | ) const; |
| 1805 | |
| 1806 | /** @brief Performs detection |
| 1807 | * |
| 1808 | * Given the input @p frame, prepare network input, run network inference, post-process network output and return result detections. |
| 1809 | * |
| 1810 | * Each result is rotated rectangle. |
| 1811 | * |
| 1812 | * @note Result may be inaccurate in case of strong perspective transformations. |
| 1813 | * |
| 1814 | * @param[in] frame the input image |
| 1815 | * @param[out] detections array with detections' RotationRect results |
| 1816 | * @param[out] confidences array with detection confidences |
| 1817 | */ |
| 1818 | CV_WRAP |
| 1819 | void detectTextRectangles( |
| 1820 | InputArray frame, |
| 1821 | CV_OUT std::vector<cv::RotatedRect>& detections, |
| 1822 | CV_OUT std::vector<float>& confidences |
| 1823 | ) const; |
| 1824 | |
| 1825 | /** @overload */ |
| 1826 | CV_WRAP |
| 1827 | void detectTextRectangles( |
| 1828 | InputArray frame, |
| 1829 | CV_OUT std::vector<cv::RotatedRect>& detections |
| 1830 | ) const; |
| 1831 | }; |
| 1832 | |
| 1833 | /** @brief This class represents high-level API for text detection DL networks compatible with EAST model. |
| 1834 | * |
| 1835 | * Configurable parameters: |
| 1836 | * - (float) confThreshold - used to filter boxes by confidences, default: 0.5f |
| 1837 | * - (float) nmsThreshold - used in non maximum suppression, default: 0.0f |
| 1838 | */ |
| 1839 | class CV_EXPORTS_W_SIMPLE TextDetectionModel_EAST : public TextDetectionModel |
| 1840 | { |
| 1841 | public: |
| 1842 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) |
| 1843 | TextDetectionModel_EAST(); |
| 1844 | |
| 1845 | /** |
| 1846 | * @brief Create text detection algorithm from deep learning network |
| 1847 | * @param[in] network Net object |
| 1848 | */ |
| 1849 | CV_WRAP TextDetectionModel_EAST(const Net& network); |
| 1850 | |
| 1851 | /** |
| 1852 | * @brief Create text detection model from network represented in one of the supported formats. |
| 1853 | * An order of @p model and @p config arguments does not matter. |
| 1854 | * @param[in] model Binary file contains trained weights. |
| 1855 | * @param[in] config Text file contains network configuration. |
| 1856 | */ |
| 1857 | CV_WRAP inline |
| 1858 | TextDetectionModel_EAST(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "" ) |
| 1859 | : TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ } |
| 1860 | |
| 1861 | /** |
| 1862 | * @brief Set the detection confidence threshold |
| 1863 | * @param[in] confThreshold A threshold used to filter boxes by confidences |
| 1864 | */ |
| 1865 | CV_WRAP |
| 1866 | TextDetectionModel_EAST& setConfidenceThreshold(float confThreshold); |
| 1867 | |
| 1868 | /** |
| 1869 | * @brief Get the detection confidence threshold |
| 1870 | */ |
| 1871 | CV_WRAP |
| 1872 | float getConfidenceThreshold() const; |
| 1873 | |
| 1874 | /** |
| 1875 | * @brief Set the detection NMS filter threshold |
| 1876 | * @param[in] nmsThreshold A threshold used in non maximum suppression |
| 1877 | */ |
| 1878 | CV_WRAP |
| 1879 | TextDetectionModel_EAST& setNMSThreshold(float nmsThreshold); |
| 1880 | |
| 1881 | /** |
| 1882 | * @brief Get the detection confidence threshold |
| 1883 | */ |
| 1884 | CV_WRAP |
| 1885 | float getNMSThreshold() const; |
| 1886 | }; |
| 1887 | |
| 1888 | /** @brief This class represents high-level API for text detection DL networks compatible with DB model. |
| 1889 | * |
| 1890 | * Related publications: @cite liao2020real |
| 1891 | * Paper: https://arxiv.org/abs/1911.08947 |
| 1892 | * For more information about the hyper-parameters setting, please refer to https://github.com/MhLiao/DB |
| 1893 | * |
| 1894 | * Configurable parameters: |
| 1895 | * - (float) binaryThreshold - The threshold of the binary map. It is usually set to 0.3. |
| 1896 | * - (float) polygonThreshold - The threshold of text polygons. It is usually set to 0.5, 0.6, and 0.7. Default is 0.5f |
| 1897 | * - (double) unclipRatio - The unclip ratio of the detected text region, which determines the output size. It is usually set to 2.0. |
| 1898 | * - (int) maxCandidates - The max number of the output results. |
| 1899 | */ |
| 1900 | class CV_EXPORTS_W_SIMPLE TextDetectionModel_DB : public TextDetectionModel |
| 1901 | { |
| 1902 | public: |
| 1903 | CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) |
| 1904 | TextDetectionModel_DB(); |
| 1905 | |
| 1906 | /** |
| 1907 | * @brief Create text detection algorithm from deep learning network. |
| 1908 | * @param[in] network Net object. |
| 1909 | */ |
| 1910 | CV_WRAP TextDetectionModel_DB(const Net& network); |
| 1911 | |
| 1912 | /** |
| 1913 | * @brief Create text detection model from network represented in one of the supported formats. |
| 1914 | * An order of @p model and @p config arguments does not matter. |
| 1915 | * @param[in] model Binary file contains trained weights. |
| 1916 | * @param[in] config Text file contains network configuration. |
| 1917 | */ |
| 1918 | CV_WRAP inline |
| 1919 | TextDetectionModel_DB(CV_WRAP_FILE_PATH const std::string& model, CV_WRAP_FILE_PATH const std::string& config = "" ) |
| 1920 | : TextDetectionModel_DB(readNet(model, config)) { /* nothing */ } |
| 1921 | |
| 1922 | CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold); |
| 1923 | CV_WRAP float getBinaryThreshold() const; |
| 1924 | |
| 1925 | CV_WRAP TextDetectionModel_DB& setPolygonThreshold(float polygonThreshold); |
| 1926 | CV_WRAP float getPolygonThreshold() const; |
| 1927 | |
| 1928 | CV_WRAP TextDetectionModel_DB& setUnclipRatio(double unclipRatio); |
| 1929 | CV_WRAP double getUnclipRatio() const; |
| 1930 | |
| 1931 | CV_WRAP TextDetectionModel_DB& setMaxCandidates(int maxCandidates); |
| 1932 | CV_WRAP int getMaxCandidates() const; |
| 1933 | }; |
| 1934 | |
| 1935 | //! @} |
| 1936 | CV__DNN_INLINE_NS_END |
| 1937 | } |
| 1938 | } |
| 1939 | |
| 1940 | #include <opencv2/dnn/layer.hpp> |
| 1941 | #include <opencv2/dnn/dnn.inl.hpp> |
| 1942 | |
| 1943 | /// @deprecated Include this header directly from application. Automatic inclusion will be removed |
| 1944 | #include <opencv2/dnn/utils/inference_engine.hpp> |
| 1945 | |
| 1946 | #endif /* OPENCV_DNN_DNN_HPP */ |
| 1947 | |