op_cuda.cpp source code [opencv/modules/dnn/src/op_cuda.cpp]

1	// This file is part of OpenCV project.
2	// It is subject to the license terms in the LICENSE file found in the top-level directory
3	// of this distribution and at http://opencv.org/license.html.
4
5	#include "precomp.hpp"
6
7	#ifdef HAVE_CUDA
8	#include "op_cuda.hpp"
9	#include "cuda4dnn/init.hpp"
10	#include "net_impl.hpp"
11
12	namespace cv { namespace dnn {
13	CV__DNN_INLINE_NS_BEGIN
14
15
16	void Net::Impl::initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
17	{
18	CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
19
20	if (!cudaInfo) / we need to check only once /
21	cuda4dnn::checkVersions();
22
23	if (cuda4dnn::getDeviceCount() <= `0`)
24	CV_Error(Error::StsError, "No CUDA capable device found.");
25
26	if (cuda4dnn::getDevice() < `0`)
27	CV_Error(Error::StsError, "No CUDA capable device selected.");
28
29	if (!cuda4dnn::isDeviceCompatible())
30	CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
31
32	if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
33	{
34	CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target.");
35	preferableTarget = DNN_TARGET_CUDA;
36	}
37
38	if (!cudaInfo)
39	{
40	cuda4dnn::csl::CSLContext context;
41	context.stream = cuda4dnn::csl::Stream(true);
42	context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream);
43	context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream);
44
45	auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
46	cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
47	}
48
49	cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any
50
51	for (auto& layer : layers)
52	{
53	auto& ld = layer.second;
54	if (ld.id == `0`)
55	{
56	for (auto& wrapper : ld.inputBlobsWrappers)
57	{
58	auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
59	cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
60	}
61	}
62
63	for (auto& wrapper : ld.outputBlobsWrappers)
64	{
65	auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
66	cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
67	}
68	}
69
70	for (auto& layer : layers)
71	{
72	auto& ld = layer.second;
73	auto& layerInstance = ld.layerInstance;
74
75	if (!layerInstance->supportBackend(DNN_BACKEND_CUDA))
76	{
77	std::ostringstream os;
78	os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name
79	<< "\" of type " << ld.type << `'\n'`;
80	CV_LOG_INFO(NULL, os.str().c_str());
81	continue;
82	}
83
84	/ we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` /
85	auto context = cudaInfo->context;
86	auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
87	ld.backendNodes[DNN_BACKEND_CUDA] = node;
88
89	if(!node.empty())
90	{
91	auto cudaNode = node.dynamicCast<CUDABackendNode>();
92	cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
93	}
94	}
95
96	if (blobsToKeep_.size() > `1`)
97	{
98	for (const auto& pin : blobsToKeep_)
99	{
100	LayerData& ld = layers[pin.lid];
101	ld.cudaD2HBackgroundTransfers.push_back(pin.oid);
102	}
103	}
104	}
105
106
107	CV__DNN_INLINE_NS_END
108	}} // namespace cv::dnn
109	#endif // HAVE_CUDA
110

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

source code of opencv/modules/dnn/src/op_cuda.cpp