1 | // This file is part of OpenCV project. |
2 | // It is subject to the license terms in the LICENSE file found in the top-level directory |
3 | // of this distribution and at http://opencv.org/license.html. |
4 | |
5 | #include "precomp.hpp" |
6 | |
7 | #ifdef HAVE_CUDA |
8 | #include "op_cuda.hpp" |
9 | #include "cuda4dnn/init.hpp" |
10 | #include "net_impl.hpp" |
11 | |
12 | namespace cv { namespace dnn { |
13 | CV__DNN_INLINE_NS_BEGIN |
14 | |
15 | |
16 | void Net::Impl::initCUDABackend(const std::vector<LayerPin>& blobsToKeep_) |
17 | { |
18 | CV_Assert(preferableBackend == DNN_BACKEND_CUDA); |
19 | |
20 | if (!cudaInfo) /* we need to check only once */ |
21 | cuda4dnn::checkVersions(); |
22 | |
23 | if (cuda4dnn::getDeviceCount() <= 0) |
24 | CV_Error(Error::StsError, "No CUDA capable device found." ); |
25 | |
26 | if (cuda4dnn::getDevice() < 0) |
27 | CV_Error(Error::StsError, "No CUDA capable device selected." ); |
28 | |
29 | if (!cuda4dnn::isDeviceCompatible()) |
30 | CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration." ); |
31 | |
32 | if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) |
33 | { |
34 | CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target." ); |
35 | preferableTarget = DNN_TARGET_CUDA; |
36 | } |
37 | |
38 | if (!cudaInfo) |
39 | { |
40 | cuda4dnn::csl::CSLContext context; |
41 | context.stream = cuda4dnn::csl::Stream(true); |
42 | context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream); |
43 | context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream); |
44 | |
45 | auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers |
46 | cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream))); |
47 | } |
48 | |
49 | cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any |
50 | |
51 | for (auto& layer : layers) |
52 | { |
53 | auto& ld = layer.second; |
54 | if (ld.id == 0) |
55 | { |
56 | for (auto& wrapper : ld.inputBlobsWrappers) |
57 | { |
58 | auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>(); |
59 | cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); |
60 | } |
61 | } |
62 | |
63 | for (auto& wrapper : ld.outputBlobsWrappers) |
64 | { |
65 | auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>(); |
66 | cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); |
67 | } |
68 | } |
69 | |
70 | for (auto& layer : layers) |
71 | { |
72 | auto& ld = layer.second; |
73 | auto& layerInstance = ld.layerInstance; |
74 | |
75 | if (!layerInstance->supportBackend(DNN_BACKEND_CUDA)) |
76 | { |
77 | std::ostringstream os; |
78 | os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name |
79 | << "\" of type " << ld.type << '\n'; |
80 | CV_LOG_INFO(NULL, os.str().c_str()); |
81 | continue; |
82 | } |
83 | |
84 | /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */ |
85 | auto context = cudaInfo->context; |
86 | auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers); |
87 | ld.backendNodes[DNN_BACKEND_CUDA] = node; |
88 | |
89 | if(!node.empty()) |
90 | { |
91 | auto cudaNode = node.dynamicCast<CUDABackendNode>(); |
92 | cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); |
93 | } |
94 | } |
95 | |
96 | if (blobsToKeep_.size() > 1) |
97 | { |
98 | for (const auto& pin : blobsToKeep_) |
99 | { |
100 | LayerData& ld = layers[pin.lid]; |
101 | ld.cudaD2HBackgroundTransfers.push_back(pin.oid); |
102 | } |
103 | } |
104 | } |
105 | |
106 | |
107 | CV__DNN_INLINE_NS_END |
108 | }} // namespace cv::dnn |
109 | #endif // HAVE_CUDA |
110 | |