// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP #include "../../op_cuda.hpp" #include "../csl/stream.hpp" #include "../csl/tensor.hpp" #include "../csl/tensor_ops.hpp" #include "../kernels/eltwise_ops.hpp" #include #include #include #include namespace cv { namespace dnn { namespace cuda4dnn { enum class EltwiseOpType { MAX, SUM, PRODUCT, DIV }; template class EltwiseOp final : public CUDABackendNode { public: using wrapper_type = GetCUDABackendWrapperType; template EltwiseOp(csl::Stream stream_, EltwiseOpType op_, std::vector coeffs_) : stream(std::move(stream_)), op{ op_ }, coeffs(std::begin(coeffs_), std::end(coeffs_)) { } void forward( const std::vector>& inputs, const std::vector>& outputs, csl::Workspace& workspace) override { CV_Assert(inputs.size() >= 2); CV_Assert(outputs.size() == 1); CV_Assert(coeffs.size() == 0 || op == EltwiseOpType::SUM); CV_Assert(coeffs.size() == 0 || inputs.size() == coeffs.size()); auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); if (inputs.size() == 2) { auto input_wrapper_x = inputs[0].dynamicCast(); auto input_x = input_wrapper_x->getView(); auto input_wrapper_y = inputs[1].dynamicCast(); auto input_y = input_wrapper_y->getView(); switch (op) { case EltwiseOpType::MAX: kernels::eltwise_max_2(stream, output, input_x, input_y); break; case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2(stream, output, input_x, input_y); break; case EltwiseOpType::DIV: kernels::eltwise_div_2(stream, output, input_x, input_y); break; case EltwiseOpType::SUM: if (coeffs.empty() || (coeffs[0] == 1 && coeffs[1] == 1)) kernels::eltwise_sum_2(stream, output, input_x, input_y); else kernels::eltwise_sum_coeff_2(stream, output, coeffs[0], input_x, coeffs[1], input_y); break; } } else { auto input_wrapper_0 = inputs[0].dynamicCast(); auto input_0 = input_wrapper_0->getView(); /* we first make a copy and then apply EltwiseOp cumulatively */ csl::tensor_ops::copy(stream, output, input_0); for (int i = 1; i < inputs.size(); i++) { auto input_wrapper = inputs[i].dynamicCast(); auto input = input_wrapper->getView(); switch (op) { case EltwiseOpType::MAX: kernels::eltwise_max_2(stream, output, output, input); break; case EltwiseOpType::PRODUCT: kernels::eltwise_prod_2(stream, output, output, input); break; case EltwiseOpType::DIV: kernels::eltwise_div_2(stream, output, output, input); break; case EltwiseOpType::SUM: if (coeffs.empty() || coeffs[i] == 1) kernels::eltwise_sum_2(stream, output, output, input); else { /* if this is the first op, we must scale output too */ auto coeff_x = (i == 1) ? coeffs[0] : static_cast(1.0); kernels::eltwise_sum_coeff_2(stream, output, coeff_x, output, coeffs[i], input); } break; } } } } private: csl::Stream stream; EltwiseOpType op; std::vector coeffs; }; }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ELTWISE_HPP */