1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html.
4
5#include "precomp.hpp"
6
7#ifdef HAVE_CUDA
8#include "op_cuda.hpp"
9#include "cuda4dnn/init.hpp"
10#include "net_impl.hpp"
11
12namespace cv { namespace dnn {
13CV__DNN_INLINE_NS_BEGIN
14
15
16void Net::Impl::initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
17{
18 CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
19
20 if (!cudaInfo) /* we need to check only once */
21 cuda4dnn::checkVersions();
22
23 if (cuda4dnn::getDeviceCount() <= 0)
24 CV_Error(Error::StsError, "No CUDA capable device found.");
25
26 if (cuda4dnn::getDevice() < 0)
27 CV_Error(Error::StsError, "No CUDA capable device selected.");
28
29 if (!cuda4dnn::isDeviceCompatible())
30 CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
31
32 if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
33 {
34 CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target.");
35 preferableTarget = DNN_TARGET_CUDA;
36 }
37
38 if (!cudaInfo)
39 {
40 cuda4dnn::csl::CSLContext context;
41 context.stream = cuda4dnn::csl::Stream(true);
42 context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream);
43 context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream);
44
45 auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
46 cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
47 }
48
49 cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any
50
51 for (auto& layer : layers)
52 {
53 auto& ld = layer.second;
54 if (ld.id == 0)
55 {
56 for (auto& wrapper : ld.inputBlobsWrappers)
57 {
58 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
59 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
60 }
61 }
62
63 for (auto& wrapper : ld.outputBlobsWrappers)
64 {
65 auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
66 cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
67 }
68 }
69
70 for (auto& layer : layers)
71 {
72 auto& ld = layer.second;
73 auto& layerInstance = ld.layerInstance;
74
75 if (!layerInstance->supportBackend(DNN_BACKEND_CUDA))
76 {
77 std::ostringstream os;
78 os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name
79 << "\" of type " << ld.type << '\n';
80 CV_LOG_INFO(NULL, os.str().c_str());
81 continue;
82 }
83
84 /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */
85 auto context = cudaInfo->context;
86 auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
87 ld.backendNodes[DNN_BACKEND_CUDA] = node;
88
89 if(!node.empty())
90 {
91 auto cudaNode = node.dynamicCast<CUDABackendNode>();
92 cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
93 }
94 }
95
96 if (blobsToKeep_.size() > 1)
97 {
98 for (const auto& pin : blobsToKeep_)
99 {
100 LayerData& ld = layers[pin.lid];
101 ld.cudaD2HBackgroundTransfers.push_back(pin.oid);
102 }
103 }
104}
105
106
107CV__DNN_INLINE_NS_END
108}} // namespace cv::dnn
109#endif // HAVE_CUDA
110

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of opencv/modules/dnn/src/op_cuda.cpp