diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 26d7a9b069..44b16f7800 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -738,6 +738,40 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS CeluLayer : public ActivationLayer + { + public: + float alpha; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS HardSigmoidLayer : public ActivationLayer + { + public: + float alpha; + float beta; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS SeluLayer : public ActivationLayer + { + public: + float alpha; + float gamma; + + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer + { + public: + float alpha; + + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer { public: diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index 3d99a03ae3..f5dafcea7f 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -233,6 +233,26 @@ void tan(const Stream& stream, Span output, View input) { generic_op>(stream, output, input); } +template +void celu(const Stream& stream, Span output, View input, T alpha) { + generic_op>(stream, output, input, {alpha}); +} + +template +void hardsigmoid(const Stream& stream, Span output, View input, T alpha, T beta) { + generic_op>(stream, output, input, {alpha, beta}); +} + +template +void selu(const Stream& stream, Span output, View input, T alpha, T gamma) { + generic_op>(stream, output, input, {alpha, gamma}); +} + +template +void thresholdedrelu(const Stream& stream, Span output, View input, T alpha) { + generic_op>(stream, output, input, {alpha}); +} + template void abs(const Stream& stream, Span output, View input) { generic_op>(stream, output, input); @@ -286,6 +306,10 @@ template void sinh<__half>(const Stream&, Span<__half>, View<__half>); template void softplus<__half>(const Stream&, Span<__half>, View<__half>); template void softsign<__half>(const Stream&, Span<__half>, View<__half>); template void tan<__half>(const Stream&, Span<__half>, View<__half>); +template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half); +template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); #endif @@ -321,6 +345,10 @@ template void sinh(const Stream&, Span, View); template void softplus(const Stream&, Span, View); template void softsign(const Stream&, Span, View); template void tan(const Stream&, Span, View); +template void celu(const Stream&, Span, View, float); +template void hardsigmoid(const Stream&, Span, View, float, float); +template void selu(const Stream&, Span, View, float, float); +template void thresholdedrelu(const Stream&, Span, View, float); template void power(const Stream&, Span, View, float, float, float); template void exp(const Stream&, Span, View, float, float); diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index c3d1669344..640c7c8ad6 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -528,6 +528,84 @@ struct TanFunctor { } }; +template +struct CeluFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : alpha(1) { } + CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { } + T alpha; + }; + + CUDA4DNN_DEVICE CeluFunctor() : CeluFunctor(Params{}) { } + CUDA4DNN_DEVICE CeluFunctor(const Params& params) : alpha{params.alpha} { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::min; + using csl::device::max; + using csl::device::expm1; + return max(T(0), value) + min(T(0), alpha * expm1(value / alpha)); + } + + T alpha; +}; + +template +struct HardSigmoidFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : alpha(0.2), beta(0.5) { } + CUDA4DNN_HOST_DEVICE Params(T alpha_, T beta_) : alpha(alpha_), beta(beta_) { } + T alpha, beta; + }; + + CUDA4DNN_DEVICE HardSigmoidFunctor() : HardSigmoidFunctor(Params{}) { } + CUDA4DNN_DEVICE HardSigmoidFunctor(const Params& params): alpha{params.alpha}, beta{params.beta} { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::clamp; + return clamp(alpha * value + beta, T(0), T(1)); + } + + T alpha, beta; +}; + +template +struct SeluFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : alpha(1.6732632423543772848170429916717), + gamma(1.0507009873554804934193349852946) { } + CUDA4DNN_HOST_DEVICE Params(T alpha_, T gamma_) : alpha(alpha_), gamma(gamma_) { } + T alpha, gamma; + }; + + CUDA4DNN_DEVICE SeluFunctor() : SeluFunctor(Params{}) { } + CUDA4DNN_DEVICE SeluFunctor(const Params& params): alpha{params.alpha}, gamma{params.gamma} { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::expm1; + return gamma * (value > T(0) ? value : alpha * expm1(value)); + } + + T alpha, gamma; +}; + +template +struct ThresholdedReluFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : alpha(1) { } + CUDA4DNN_HOST_DEVICE Params(T alpha_) : alpha(alpha_) { } + T alpha; + }; + + CUDA4DNN_DEVICE ThresholdedReluFunctor() : ThresholdedReluFunctor(Params{}) { } + CUDA4DNN_DEVICE ThresholdedReluFunctor(const Params& params) : alpha{params.alpha} { } + + CUDA4DNN_DEVICE T operator()(T value) { + return (value > alpha) ? value : T(0); + } + + T alpha; +}; + template struct PowerFunctor { struct Params { diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index 854bc8ac0c..ef1f6da3e6 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -105,6 +105,18 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void tan(const csl::Stream& stream, csl::Span output, csl::View input); + template + void celu(const csl::Stream& stream, csl::Span output, csl::View input, T alpha); + + template + void hardsigmoid(const csl::Stream& stream, csl::Span output, csl::View input, T alpha, T beta); + + template + void selu(const csl::Stream& stream, csl::Span output, csl::View input, T alpha, T gamma); + + template + void thresholdedrelu(const csl::Stream& stream, csl::Span output, csl::View input, T alpha); + template void power(const csl::Stream& stream, csl::Span output, csl::View input, T exp, T scale, T shift); diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index 4691996d4e..39ebf513a7 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -490,6 +490,68 @@ namespace cv { namespace dnn { namespace cuda4dnn { csl::Stream stream; }; + template + class CeluOp final : public BaseOp { + public: + CeluOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha{ alpha_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::celu(stream, output, input, alpha); + } + + private: + csl::Stream stream; + const T alpha; + }; + + template + class HardSigmoidOp final : public BaseOp { + public: + HardSigmoidOp(csl::Stream stream_, T alpha_, T beta_) + : stream(std::move(stream_)), alpha{ alpha_ }, beta{ beta_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::hardsigmoid(stream, output, input, alpha, beta); + } + + private: + csl::Stream stream; + const T alpha, beta; + }; + + template + class SeluOp final : public BaseOp { + public: + SeluOp(csl::Stream stream_, T alpha_, T gamma_) + : stream(std::move(stream_)), alpha{ alpha_ }, gamma{ gamma_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::selu(stream, output, input, alpha, gamma); + } + + private: + csl::Stream stream; + const T alpha, gamma; + }; + + template + class ThresholdedReluOp final : public BaseOp { + public: + ThresholdedReluOp(csl::Stream stream_, T alpha_) : stream(std::move(stream_)), alpha{ alpha_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::thresholdedrelu(stream, output, input, alpha); + } + + private: + csl::Stream stream; + const T alpha; + }; + template class PowerOp final : public BaseOp { public: diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 89a91e17ae..55ed1e5d17 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -132,6 +132,10 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Softplus, SoftplusLayer); CV_DNN_REGISTER_LAYER_CLASS(Softsign, SoftsignLayer); CV_DNN_REGISTER_LAYER_CLASS(Tan, TanLayer); + CV_DNN_REGISTER_LAYER_CLASS(Celu, CeluLayer); + CV_DNN_REGISTER_LAYER_CLASS(HardSigmoid, HardSigmoidLayer); + CV_DNN_REGISTER_LAYER_CLASS(Selu, SeluLayer); + CV_DNN_REGISTER_LAYER_CLASS(ThresholdedRelu,ThresholdedReluLayer); CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer); CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 772dfca602..bfabef9d68 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -71,6 +71,7 @@ namespace dnn using std::abs; using std::exp; +using std::expm1; using std::tanh; using std::pow; using std::ceil; @@ -728,6 +729,20 @@ struct BaseDefaultFunctor : public BaseFunctor return true; } +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + CV_Error(Error::StsNotImplemented, ""); + } +#endif + +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + CV_Error(Error::StsNotImplemented, ""); + } +#endif // HAVE_HALIDE + #ifdef HAVE_DNN_IE_NN_BUILDER_2019 InferenceEngine::Builder::Layer initInfEngineBuilderAPI() { @@ -746,8 +761,6 @@ struct BaseDefaultFunctor : public BaseFunctor ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) { CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; } #endif @@ -759,20 +772,6 @@ struct BaseDefaultFunctor : public BaseFunctor } #endif // HAVE_VULKAN -#ifdef HAVE_CUDA - Ptr initCUDA(int target, csl::Stream stream) - { - CV_Error(Error::StsNotImplemented, ""); - } -#endif - -#ifdef HAVE_HALIDE - void attachHalide(const Halide::Expr& input, Halide::Func& top) - { - CV_Error(Error::StsNotImplemented, ""); - } -#endif // HAVE_HALIDE - private: static const char* const ocl_kernel_name; }; @@ -823,15 +822,6 @@ struct TanHFunctor : public BaseDefaultFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - int64 getFLOPSPerElement() const { return 1; } }; @@ -935,15 +925,6 @@ struct MishFunctor : public BaseDefaultFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - int64 getFLOPSPerElement() const { return 3; } }; @@ -996,15 +977,6 @@ struct SigmoidFunctor : public BaseDefaultFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - int64 getFLOPSPerElement() const { return 3; } }; @@ -1123,15 +1095,6 @@ struct AbsValFunctor : public BaseDefaultFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - int64 getFLOPSPerElement() const { return 1; } }; @@ -1261,15 +1224,6 @@ struct LogFunctor : public BaseDefaultFunctor return log(x); } -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { @@ -1367,15 +1321,6 @@ struct SqrtFunctor : public BaseDefaultFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - int64 getFLOPSPerElement() const { return 1; } }; @@ -1822,6 +1767,156 @@ struct TanFunctor : public BaseDefaultFunctor template<> const char* const BaseDefaultFunctor::ocl_kernel_name = "TanForward"; +struct CeluFunctor : public BaseDefaultFunctor +{ + typedef CeluLayer Layer; + + float alpha; + + explicit CeluFunctor(float alpha_ = 1.f) : alpha(alpha_) {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return max(0.f, x) + min(0.f, alpha * expm1(x / alpha)); + } + + inline void setKernelParams(ocl::Kernel& kernel) const + { + kernel.set(3, alpha); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, alpha); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "CeluForward"; + +struct HardSigmoidFunctor : public BaseDefaultFunctor +{ + typedef HardSigmoidLayer Layer; + + float alpha; + float beta; + + explicit HardSigmoidFunctor(float alpha_ = 0.2f, float beta_ = 0.5f) : alpha(alpha_), beta(beta_) {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return max(0.f, min(1.f, alpha * x + beta)); + } + + inline void setKernelParams(ocl::Kernel& kernel) const + { + kernel.set(3, alpha); + kernel.set(4, beta); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, alpha, beta); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "HardSigmoidForward"; + +struct SeluFunctor : public BaseDefaultFunctor +{ + typedef SeluLayer Layer; + + float alpha; + float gamma; + + explicit SeluFunctor(float alpha_ = 1.67326319217681884765625f, + float gamma_ = 1.05070102214813232421875f) : alpha(alpha_), gamma(gamma_) {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return gamma * (x > 0.f ? x : alpha * expm1(x)); + } + + inline void setKernelParams(ocl::Kernel& kernel) const + { + kernel.set(3, alpha); + kernel.set(4, gamma); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, alpha, gamma); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "SeluForward"; + +struct ThresholdedReluFunctor : public BaseDefaultFunctor +{ + typedef ThresholdedReluLayer Layer; + + float alpha; + + explicit ThresholdedReluFunctor(float alpha_ = 1.f) : alpha(alpha_) {} + + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return x > alpha ? x : 0.f; + } + + inline void setKernelParams(ocl::Kernel& kernel) const + { + kernel.set(3, alpha); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, alpha); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "ThresholdedReluForward"; + struct PowerFunctor : public BaseFunctor { typedef PowerLayer Layer; @@ -2074,15 +2169,6 @@ struct ExpFunctor : public BaseDefaultFunctor } #endif // HAVE_DNN_NGRAPH -#ifdef HAVE_WEBNN - ml::Operand initWebnnAPI(const ml::GraphBuilder& builder, const ml::Operand& input) - { - CV_Error(Error::StsNotImplemented, ""); - ml::Operand operand; - return operand; - } -#endif - int64 getFLOPSPerElement() const { return 3; } }; @@ -2489,6 +2575,50 @@ Ptr TanLayer::create(const LayerParams& params) return l; } +Ptr CeluLayer::create(const LayerParams& params) +{ + float alpha = params.get("alpha", 1.f); + Ptr l(new ElementWiseLayer(CeluFunctor(alpha))); + l->setParamsFrom(params); + l->alpha = alpha; + + return l; +} + +Ptr HardSigmoidLayer::create(const LayerParams& params) +{ + float alpha = params.get("alpha", 0.2f); + float beta = params.get("beta", 0.5f); + Ptr l(new ElementWiseLayer(HardSigmoidFunctor(alpha, beta))); + l->setParamsFrom(params); + l->alpha = alpha; + l->beta = beta; + + return l; +} + +Ptr SeluLayer::create(const LayerParams& params) +{ + float alpha = params.get("alpha", 1.67326319217681884765625f); + float gamma = params.get("gamma", 1.05070102214813232421875f); + Ptr l(new ElementWiseLayer(SeluFunctor(alpha, gamma))); + l->setParamsFrom(params); + l->alpha = alpha; + l->gamma = gamma; + + return l; +} + +Ptr ThresholdedReluLayer::create(const LayerParams& params) +{ + float alpha = params.get("alpha", 1.f); + Ptr l(new ElementWiseLayer(ThresholdedReluFunctor(alpha))); + l->setParamsFrom(params); + l->alpha = alpha; + + return l; +} + Ptr PowerLayer::create(const LayerParams& params) { float power = params.get("power", 1.0f); diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index 81a5df1a28..80fe0b173e 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -242,6 +242,70 @@ public: } }; +class CeluSubgraph : public Subgraph +{ +public: + CeluSubgraph() : alpha(1.f) + { + int input = addNodeToMatch(""); + int div = addNodeToMatch("Div", input, addNodeToMatch("")); + int elu = addNodeToMatch("Elu", div); + addNodeToMatch("Mul", addNodeToMatch(""), elu); + setFusedNode("Celu", input); + } + + static float extractAlpha(const Ptr& net, int node_id, int input_id) + { + const Ptr node = net->getNode(node_id); + int const_id = getInputNodeId(net, node, input_id); + Ptr alpha_ptr = net->getNode(const_id); + opencv_onnx::NodeProto* alpha_node = alpha_ptr.dynamicCast()->node; + opencv_onnx::TensorProto alpha_proto = alpha_node->attribute(0).t(); + Mat alpha_mat = getMatFromTensor(alpha_proto); + return *alpha_mat.ptr(); + } + + virtual bool match(const Ptr& net, int nodeId, + std::vector& matchedNodesIds, + std::vector& targetNodesIds) CV_OVERRIDE + { + if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds)) + { + float alpha_div = extractAlpha(net, matchedNodesIds[0], 1); + float alpha_mul = extractAlpha(net, matchedNodesIds[2], 0); + float alpha_elu = 1.f; + + Ptr elu_ptr = net->getNode(matchedNodesIds[1]); + opencv_onnx::NodeProto* elu_node = elu_ptr.dynamicCast()->node; + + for (int i = 0; i < elu_node->attribute_size(); i++) + { + opencv_onnx::AttributeProto attr = elu_node->attribute(i); + if (attr.name() != "alpha") + continue; + alpha_elu = attr.f(); + } + + alpha = alpha_div; + return alpha_elu == 1.f && alpha_div == alpha_mul; + } + return false; + } + + virtual void finalize(const Ptr&, + const Ptr& fusedNode, + std::vector >&) CV_OVERRIDE + { + opencv_onnx::NodeProto* node = fusedNode.dynamicCast()->node; + opencv_onnx::AttributeProto* alpha_attr = node->add_attribute(); + alpha_attr->set_name("alpha"); + alpha_attr->set_f(alpha); + } + +protected: + float alpha; +}; + class NormalizeSubgraphBase : public Subgraph { public: @@ -662,6 +726,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index 02ed9345c3..040ee20d8a 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -272,3 +272,37 @@ __kernel void TanForward(const int n, __global T* in, __global T* out) { if(index < n) out[index] = tan(in[index]); } + +__kernel void CeluForward(const int n, __global T* in, __global T* out, + const KERNEL_ARG_DTYPE alpha) +{ + int index = get_global_id(0); + if(index < n) + out[index] = max(0.f, in[index]) + min(0.f, alpha * expm1(in[index] / alpha)); +} + +__kernel void HardSigmoidForward(const int n, __global T* in, __global T* out, + const KERNEL_ARG_DTYPE alpha, + const KERNEL_ARG_DTYPE beta) +{ + int index = get_global_id(0); + if(index < n) + out[index] = max(0.f, min(1.f, alpha * in[index] + beta)); +} + +__kernel void SeluForward(const int n, __global T* in, __global T* out, + const KERNEL_ARG_DTYPE alpha, + const KERNEL_ARG_DTYPE gamma) +{ + int index = get_global_id(0); + if(index < n) + out[index] = gamma * (in[index] > 0.f ? in[index] : alpha * expm1(in[index])); +} + +__kernel void ThresholdedReluForward(const int n, __global T* in, __global T* out, + const KERNEL_ARG_DTYPE alpha) +{ + int index = get_global_id(0); + if(index < n) + out[index] = (in[index] > alpha ? in[index] : 0.f); +} diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index a69ace0d14..e5d0ead9da 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -57,7 +57,6 @@ "test_castlike_FLOAT_to_FLOAT16_expanded", "test_castlike_FLOAT_to_STRING", "test_castlike_STRING_to_FLOAT", -"test_celu", "test_clip", "test_clip_default_inbounds", "test_clip_default_int8_inbounds", @@ -173,9 +172,6 @@ "test_hardmax_example", "test_hardmax_negative_axis", "test_hardmax_one_hot", -"test_hardsigmoid", -"test_hardsigmoid_default", -"test_hardsigmoid_example", "test_identity_opt", "test_identity_sequence", "test_if", @@ -524,9 +520,6 @@ "test_sce_sum_expanded", "test_sce_sum_log_prob", "test_sce_sum_log_prob_expanded", -"test_selu", -"test_selu_default", -"test_selu_example", "test_sequence_insert_at_back", "test_sequence_insert_at_front", "test_shape", @@ -579,9 +572,6 @@ "test_tfidfvectorizer_tf_onlybigrams_levelempty", "test_tfidfvectorizer_tf_onlybigrams_skip5", "test_tfidfvectorizer_tf_uniandbigrams_skip5", -"test_thresholdedrelu", -"test_thresholdedrelu_default", -"test_thresholdedrelu_example", "test_tile", "test_tile_precomputed", "test_top_k",