diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 39aaa1edb4..08cec7ae64 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -499,6 +499,14 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS ExpLayer : public ActivationLayer + { + public: + float base, scale, shift; + + static Ptr create(const LayerParams ¶ms); + }; + /* Layers used in semantic segmentation */ class CV_EXPORTS CropLayer : public Layer diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index 6a991baea2..599d58852e 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -145,6 +145,11 @@ void power(const Stream& stream, Span output, View input, T exp, T scale, generic_op>(stream, output, input, {exp, scale, shift}); } +template +void exp(const Stream& stream, Span output, View input, T normScale, T normShift) { + generic_op>(stream, output, input, {normScale, normShift}); +} + #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) template void relu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void clipped_relu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); @@ -156,6 +161,7 @@ template void elu<__half>(const Stream&, Span<__half>, View<__half>); template void abs<__half>(const Stream& stream, Span<__half> output, View<__half> input); template void bnll<__half>(const Stream&, Span<__half>, View<__half>); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); +template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); #endif @@ -169,6 +175,7 @@ template void elu(const Stream&, Span, View); template void abs(const Stream& stream, Span output, View input); template void bnll(const Stream&, Span, View); template void power(const Stream&, Span, View, float, float, float); +template void exp(const Stream&, Span, View, float, float); template static void launch_vectorized_axiswise_relu(const Stream& stream, Span output, View input, std::size_t inner_size, View slope) { diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 0435cb294f..1c29de0426 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -228,6 +228,25 @@ struct PowerFunctor { T exp, scale, shift; }; +template +struct ExpFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : normScale(1), normShift(0) { } + CUDA4DNN_HOST_DEVICE Params(T nScale_, T nShift_) : normScale(nScale_), normShift(nShift_) { } + T normScale, normShift; + }; + + CUDA4DNN_DEVICE ExpFunctor() : ExpFunctor(Params{}) { } + CUDA4DNN_DEVICE ExpFunctor(const Params& params) : normScale{params.normScale}, normShift{params.normShift} { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::fast_exp; + return fast_exp(normShift + normScale * value); + } + + T normScale, normShift; +}; + template struct MaxFunctor { struct Params { @@ -297,4 +316,4 @@ struct DivFunctor { }}}} /* namespace cv::dnn::cuda4dnn::kernels */ -#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */ \ No newline at end of file +#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index 46f697fce3..0a7c9878fb 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -45,6 +45,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void power(const csl::Stream& stream, csl::Span output, csl::View input, T exp, T scale, T shift); + template + void exp(const csl::Stream& stream, csl::Span output, csl::View input, T normScale, T normShift); + }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ACTIVATIONS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index fce996a89e..84b95927a3 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -341,6 +341,36 @@ namespace cv { namespace dnn { namespace cuda4dnn { const T exp, scale, shift; }; + template + class ExpOp final : public CUDABackendNode { + public: + using wrapper_type = GetCUDABackendWrapperType; + + ExpOp(csl::Stream stream_, T nScale_, T nShift_) + : stream(std::move(stream_)), normScale{ nScale_ }, normShift{ nShift_ } { } + + void forward( + const std::vector>& inputs, + const std::vector>& outputs, + csl::Workspace& workspace) override + { + for (int i = 0; i < inputs.size(); i++) + { + auto input_wrapper = inputs[i].dynamicCast(); + auto input = input_wrapper->getView(); + + auto output_wrapper = outputs[i].dynamicCast(); + auto output = output_wrapper->getSpan(); + + kernels::exp(stream, output, input, normScale, normShift); + } + } + + private: + csl::Stream stream; + const T normScale, normShift; + }; + }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ACTIVATION_HPP */ diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 570a6ff665..698168817f 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -110,6 +110,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(BNLL, BNLLLayer); CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer); CV_DNN_REGISTER_LAYER_CLASS(Power, PowerLayer); + CV_DNN_REGISTER_LAYER_CLASS(Exp, ExpLayer); CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer); CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index ed87a3e2fc..23d4c50521 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -1400,6 +1400,125 @@ struct PowerFunctor : public BaseFunctor int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; } }; +struct ExpFunctor : public BaseFunctor +{ + typedef ExpLayer Layer; + float base, scale, shift; + float normScale, normShift; + + ExpFunctor(float base_ = -1.f, float scale_ = 1.f, float shift_ = 0.f) + : base(base_), scale(scale_), shift(shift_) + { + CV_Check(base, base == -1.f || base > 0.f, "Unsupported 'base' value"); + } + + bool supportBackend(int backendId, int targetId) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + } + + void finalize() + { + // For base > 0 : + // y = base^(scale * input + shift) + // ln(y) = ln(base)*(scale * input + shift) + // y = exp((ln(base)*scale) * input + (ln(base)*shift)) + // y = exp(normalized_scale * input + normalized_shift) + + float ln_base = (base == -1.f) ? 1.f : log(base); + normScale = scale * ln_base; + normShift = shift * ln_base; + } + + void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + { + float a = normScale, b = normShift; + for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) + { + for( int i = 0; i < len; i++ ) + { + float x = srcptr[i]; + dstptr[i] = exp(a*x + b); + } + } + } + +#ifdef HAVE_OPENCL + bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = oclGetTMacro(inputs[0]); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat& src = inputs[i]; + UMat& dst = outputs[i]; + + ocl::Kernel kernel("ExpForward", ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, (int)src.total()); + kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); + kernel.set(3, (float)normScale); + kernel.set(4, (float)normShift); + + size_t gSize = src.total(); + CV_Assert(kernel.run(1, &gSize, NULL, false)); + } + return true; + } +#endif + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, normScale, normShift); + } +#endif + +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = exp(normScale * input + normShift); + } +#endif // HAVE_HALIDE + +#ifdef HAVE_DNN_IE_NN_BUILDER_2019 + InferenceEngine::Builder::Layer initInfEngineBuilderAPI() + { + CV_Error(Error::StsNotImplemented, ""); + } +#endif // HAVE_DNN_IE_NN_BUILDER_2019 + +#ifdef HAVE_DNN_NGRAPH + std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + { + auto scale_node = std::make_shared(ngraph::element::f32, + ngraph::Shape{1}, &normScale); + auto shift_node = std::make_shared(ngraph::element::f32, + ngraph::Shape{1}, &normShift); + auto mul = std::make_shared(scale_node, node, ngraph::op::AutoBroadcastType::NUMPY); + auto scale_shift = std::make_shared(mul, shift_node, ngraph::op::AutoBroadcastType::NUMPY); + return std::make_shared(scale_shift); + } +#endif // HAVE_DNN_NGRAPH + +#ifdef HAVE_VULKAN + std::shared_ptr initVkCom() + { + // TODO: add vkcom implementation + return std::shared_ptr(); + } +#endif // HAVE_VULKAN + + int64 getFLOPSPerElement() const { return 3; } +}; + struct ChannelsPReLUFunctor : public BaseFunctor { typedef ChannelsPReLULayer Layer; @@ -1634,6 +1753,20 @@ Ptr PowerLayer::create(const LayerParams& params) return l; } +Ptr ExpLayer::create(const LayerParams& params) +{ + float base = params.get("base", -1.0f); + float scale = params.get("scale", 1.0f); + float shift = params.get("shift", 0.0f); + Ptr l(new ElementWiseLayer(ExpFunctor(base, scale, shift))); + l->setParamsFrom(params); + l->base = base; + l->scale = scale; + l->shift = shift; + + return l; +} + Ptr ChannelsPReLULayer::create(const LayerParams& params) { CV_Assert(params.blobs.size() == 1); diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index b900e6add6..68f0dd7268 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -140,3 +140,14 @@ __kernel void ELUForward(const int n, __global const T* in, __global T* out) out[index] = (src >= 0.f) ? src : exp(src) - 1; } } + +__kernel void ExpForward(const int n, __global const T* in, __global T* out, + const KERNEL_ARG_DTYPE normScale, + const KERNEL_ARG_DTYPE normShift) +{ + int index = get_global_id(0); + if (index < n) + { + out[index] = exp(normShift + normScale * in[index]); + } +} diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 203c632b02..a6f1f96159 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2425,7 +2425,7 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer_) connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" || - type == "Relu" || type == "Elu" || + type == "Relu" || type == "Elu" || type == "Exp" || type == "Identity" || type == "Relu6") { CV_CheckGT(num_inputs, 0, ""); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 7e6d7f87d2..4f9c35a933 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -632,6 +632,31 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Power, Combine( dnnBackendsAndTargetsWithHalide() )); +typedef TestWithParam > > Exp; +TEST_P(Exp, Accuracy) +{ + float base = get<0>(GetParam())[0]; + float scale = get<0>(GetParam())[1]; + float shift = get<0>(GetParam())[2]; + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); + + LayerParams lp; + lp.set("base", base); + lp.set("scale", scale); + lp.set("shift", shift); + lp.type = "Exp"; + lp.name = "testLayer"; + testInPlaceActivation(lp, backendId, targetId); +} + +INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Exp, Combine( +/*base, scale, shift*/ Values(Vec3f(0.9f, -1.0f, 1.1f), Vec3f(0.9f, 1.1f, -1.0f), + Vec3f(-1.0f, 0.9f, 1.1f), Vec3f(-1.0f, 1.1f, 0.9f), + Vec3f(1.1f, 0.9f, -1.0f), Vec3f(1.1f, -1.0f, 0.9f)), + dnnBackendsAndTargetsWithHalide() +)); + TEST_P(Test_Halide_layers, ChannelsPReLU) { LayerParams lp; diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 61537e0e01..fd48a3814c 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -2152,6 +2152,12 @@ public: randu(scales, -1.0f, 1.0f); activationParams.blobs.push_back(scales); } + else if (activationParams.type == "Exp") + { + activationParams.set("base", -1.0f); + activationParams.set("scale", 0.3f); + activationParams.set("shift", 0.6f); + } } static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients) @@ -2223,7 +2229,7 @@ public: static testing::internal::ParamGenerator activationLayersList() { // TODO: automate list generation - return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power"); + return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power", "Exp"); } static testing::internal::ParamGenerator > dnnBackendsAndTargetsForFusionTests() diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 676b0e8537..bfb87fa1d1 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -329,6 +329,13 @@ TEST_P(Test_ONNX_layers, Power) testONNXModels("pow2", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, Exp) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + testONNXModels("exp"); +} + TEST_P(Test_ONNX_layers, Concatenation) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)