mirror of
https://github.com/opencv/opencv.git
synced 2024-11-27 20:50:25 +08:00
Merge pull request #24647 from fengyuentau:cuda_sub
dnn cuda: support Sub #24647 Related https://github.com/opencv/opencv/issues/24606#issuecomment-1837390257 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
f5ec92e4ca
commit
a2edf4d929
@ -319,7 +319,13 @@ void eltwise_div_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
|
||||
eltwise_op<T, DivFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sub_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, SubFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void eltwise_sub_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<__half>, __half, TensorView<__half>, __half, TensorView<__half>);
|
||||
@ -327,6 +333,7 @@ void eltwise_div_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
|
||||
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
#endif
|
||||
template void eltwise_sub_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<float>, float, TensorView<float>, float, TensorView<float>);
|
||||
|
@ -741,6 +741,18 @@ struct DivFunctor {
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; }
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SubFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SubFunctor() { }
|
||||
CUDA4DNN_DEVICE SubFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) { return x - y; }
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SignFunctor {
|
||||
struct Params {
|
||||
|
@ -30,6 +30,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
template <class T>
|
||||
void eltwise_div_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||
|
||||
template <class T>
|
||||
void eltwise_sub_2(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> x, csl::TensorView<T> y);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
|
||||
|
@ -27,6 +27,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
PRODUCT,
|
||||
DIV,
|
||||
MIN,
|
||||
SUB,
|
||||
};
|
||||
|
||||
class EltwiseOpBase : public CUDABackendNode {
|
||||
@ -88,6 +89,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
else
|
||||
kernels::eltwise_sum_coeff_2<T>(stream, output, coeffs[0], input_x, coeffs[1], input_y);
|
||||
break;
|
||||
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, input_x, input_y); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -119,6 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
kernels::eltwise_sum_coeff_2<T>(stream, output, coeff_x, output, coeffs[i], input);
|
||||
}
|
||||
break;
|
||||
case EltwiseOpType::SUB: kernels::eltwise_sub_2<T>(stream, output, output, input); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -114,9 +114,11 @@ public:
|
||||
op == OPERATION::GREATER_EQUAL ||
|
||||
op == OPERATION::LESS_EQUAL
|
||||
);
|
||||
if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
|
||||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD)
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
if (backendId == DNN_BACKEND_CUDA) {
|
||||
return op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM ||
|
||||
op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD ||
|
||||
op == OPERATION::SUB;
|
||||
}
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
@ -828,6 +830,9 @@ public:
|
||||
case OPERATION::ADD:
|
||||
op_ = cuda4dnn::EltwiseOpType::SUM;
|
||||
break;
|
||||
case OPERATION::SUB:
|
||||
op_ = cuda4dnn::EltwiseOpType::SUB;
|
||||
break;
|
||||
default: return Ptr<BackendNode>(); // return empty cuda_node if the EltwiseOpType is unsupported type.
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user