Merge pull request #24056 from dkurt:eltwise_prelu

PReLU with element-wise scales #24056 ### Pull Request Readiness Checklist resolves https://github.com/opencv/opencv/issues/24051 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-06-07 01:13:28 +08:00 · 2023-07-27 16:36:40 +03:00 · 2023-07-27 16:36:40 +03:00 · 677a28fd2a
commit 677a28fd2a
parent ab6bffc6f8
4 changed files with 141 additions and 15 deletions
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -138,7 +138,7 @@ public:
            {
                const float* srcptr = src_->ptr<float>(i) + stripeStart;
                float* dstptr = dst_->ptr<float>(i) + stripeStart;
-                func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
+                func_->apply(srcptr, dstptr, stripeStart, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
            }
        }
    };
@ -268,7 +268,7 @@ public:

    void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
    {
-        func.apply(src, dst, len, planeSize, cn0, cn1);
+        func.apply(src, dst, -1, len, planeSize, cn0, cn1);
    }

 #ifdef HAVE_CUDA
@ -355,8 +355,9 @@ struct ReLUFunctor : public BaseFunctor
               backendId == DNN_BACKEND_CANN;
    }

-    void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
    {
+        CV_UNUSED(stripeStart);
        float s = slope;
        for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
        {
@ -559,8 +560,9 @@ struct ReLU6Functor : public BaseFunctor
               backendId == DNN_BACKEND_CANN;
    }

-    void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
    {
+        CV_UNUSED(stripeStart);
        for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
        {
            int i = 0;
@ -704,8 +706,9 @@ struct ReLU6Functor : public BaseFunctor
 template <class T>
 struct BaseDefaultFunctor : public BaseFunctor
 {
-    void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
    {
+        CV_UNUSED(stripeStart);
        for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
        {
            for( int i = 0; i < len; i++ )
@ -2226,8 +2229,9 @@ struct PowerFunctor : public BaseFunctor
        shift = originShift;
    }

-    void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
    {
+        CV_UNUSED(stripeStart);
        float a = scale, b = shift, p = power;
        if( p == 1.f )
        {
@ -2452,6 +2456,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
    Mat scale;
 #ifdef HAVE_OPENCL
    UMat scale_umat;
+    std::string oclKernelName = "ChannelsPReLUForward";
 #endif

    explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
@ -2470,8 +2475,9 @@ struct ChannelsPReLUFunctor : public BaseFunctor
               backendId == DNN_BACKEND_CANN;
    }

-    void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
    {
+        CV_UNUSED(stripeStart);
        CV_Assert(scale.isContinuous() && scale.type() == CV_32F);

        const float* scaleptr = scale.ptr<float>();
@ -2525,7 +2531,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
            UMat& src = inputs[i];
            UMat& dst = outputs[i];

-            ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
+            ocl::Kernel kernel(oclKernelName.c_str(), ocl::dnn::activations_oclsrc, buildopt);
            kernel.set(0, (int)src.total());
            kernel.set(1, (int)src.size[1]);
            kernel.set(2, (int)total(shape(src), 2));
@ -2605,6 +2611,75 @@ struct ChannelsPReLUFunctor : public BaseFunctor
    int64 getFLOPSPerElement() const { return 1; }
 };

+struct PReLUFunctor : public ChannelsPReLUFunctor
+{
+    explicit PReLUFunctor(const Mat& scale_=Mat()) : ChannelsPReLUFunctor(scale_)
+    {
+#ifdef HAVE_OPENCL
+        oclKernelName = "PReLUForward";
+#endif
+    }
+
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_CANN ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
+    }
+
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
+    {
+        CV_UNUSED(stripeStart);
+        CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
+
+        if (stripeStart < 0)
+            CV_Error(Error::StsNotImplemented, "PReLUFunctor requires stripe offset parameter");
+
+        const float* scaleptr = scale.ptr<float>() + cn0 * planeSize + stripeStart;
+        for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize, scaleptr += planeSize )
+        {
+            int i = 0;
+        #if CV_SIMD128
+            v_float32x4 z = v_setzero_f32();
+            for( ; i <= len - 16; i += 16 )
+            {
+                v_float32x4 x0 = v_load(srcptr + i);
+                v_float32x4 x1 = v_load(srcptr + i + 4);
+                v_float32x4 x2 = v_load(srcptr + i + 8);
+                v_float32x4 x3 = v_load(srcptr + i + 12);
+                v_float32x4 s0 = v_load(scaleptr + i);
+                v_float32x4 s1 = v_load(scaleptr + i + 4);
+                v_float32x4 s2 = v_load(scaleptr + i + 8);
+                v_float32x4 s3 = v_load(scaleptr + i + 12);
+                x0 = v_select(x0 >= z, x0, x0*s0);
+                x1 = v_select(x1 >= z, x1, x1*s1);
+                x2 = v_select(x2 >= z, x2, x2*s2);
+                x3 = v_select(x3 >= z, x3, x3*s3);
+                v_store(dstptr + i, x0);
+                v_store(dstptr + i + 4, x1);
+                v_store(dstptr + i + 8, x2);
+                v_store(dstptr + i + 12, x3);
+            }
+        #endif
+            for( ; i < len; i++ )
+            {
+                float x = srcptr[i];
+                float s = scaleptr[i];
+                dstptr[i] = x >= 0.f ? x : s*x;
+            }
+        }
+    }
+
+#ifdef HAVE_DNN_NGRAPH
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    {
+        auto shape = getShape<size_t>(scale);
+        auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, scale.ptr<float>());
+        return std::make_shared<ngraph::op::PRelu>(node, slope);
+    }
+#endif  // HAVE_DNN_NGRAPH
+};
+
 struct SignFunctor : public BaseDefaultFunctor<SignFunctor>
 {
    typedef SignLayer Layer;
@ -3040,13 +3115,26 @@ Ptr<ExpLayer> ExpLayer::create(const LayerParams& params)
 Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
 {
    CV_Assert(params.blobs.size() == 1);
-    if (params.blobs[0].total() == 1)
+    Mat scale = params.blobs[0];
+    float slope = *scale.ptr<float>();
+    if (scale.total() == 1 || countNonZero(scale != slope) == 0)
    {
        LayerParams reluParams = params;
-        reluParams.set("negative_slope", *params.blobs[0].ptr<float>());
+        reluParams.set("negative_slope", slope);
        return ReLULayer::create(reluParams);
    }
-    Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
+
+    Ptr<Layer> l;
+    // Check first two dimensions of scale (batch, channels)
+    MatShape scaleShape = shape(scale);
+    if (std::count_if(scaleShape.begin(), scaleShape.end(), [](int d){ return d != 1;}) > 1)
+    {
+        l = new ElementWiseLayer<PReLUFunctor>(PReLUFunctor(scale));
+    }
+    else
+    {
+        l = new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(scale));
+    }
    l->setParamsFrom(params);

    return l;
--- a/modules/dnn/src/opencl/activations.cl
+++ b/modules/dnn/src/opencl/activations.cl
@ -73,14 +73,23 @@ __kernel void ReLU6Forward(const int count, __global const T* in, __global T* ou
  }
 }

+__kernel void ChannelsPReLUForward(const int count, const int channels, const int plane_size,
+                                   __global const T* in, __global T* out,
+                                   __global const KERNEL_ARG_DTYPE* slope_data)
+{
+  int index = get_global_id(0);
+  int c = (index / plane_size) % channels;
+  if(index < count)
+    out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
+}
+
 __kernel void PReLUForward(const int count, const int channels, const int plane_size,
                           __global const T* in, __global T* out,
                           __global const KERNEL_ARG_DTYPE* slope_data)
 {
  int index = get_global_id(0);
-  int c = (index / plane_size) % channels;
  if(index < count)
-  out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
+    out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[index];
 }

 __kernel void TanHForward(const int count, __global T* in, __global T* out) {
@ -352,4 +361,4 @@ __kernel void ReciprocalForward(const int n, __global T* in, __global T* out)
    int index = get_global_id(0);
    if(index < n)
        out[index] = 1.0f/in[index];
-}
+}
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -589,6 +589,7 @@ private:
    void parsePack               (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
    void parseClipByValue        (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
    void parseLeakyRelu          (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
+    void parsePReLU              (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
    void parseActivation         (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
    void parseExpandDims         (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
    void parseSquare             (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
@ -668,6 +669,7 @@ TFImporter::DispatchMap TFImporter::buildDispatchMap()
    dispatch["Pack"] = &TFImporter::parsePack;
    dispatch["ClipByValue"] = &TFImporter::parseClipByValue;
    dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu;
+    dispatch["PReLU"] = &TFImporter::parsePReLU;
    dispatch["Abs"] = dispatch["Tanh"] = dispatch["Sigmoid"] = dispatch["Relu"] =
            dispatch["Elu"] = dispatch["Exp"] = dispatch["Identity"] = dispatch["Relu6"] = &TFImporter::parseActivation;
    dispatch["ExpandDims"] = &TFImporter::parseExpandDims;
@ -2622,6 +2624,27 @@ void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::Nod
    connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
 }

+void TFImporter::parsePReLU(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
+{
+    const std::string& name = layer.name();
+
+    Mat scales;
+    blobFromTensor(getConstBlob(layer, value_id, 1), scales);
+
+    layerParams.blobs.resize(1);
+
+    if (scales.dims == 3) {
+        // Considering scales from Keras wih HWC layout;
+        transposeND(scales, {2, 0, 1}, layerParams.blobs[0]);
+    } else {
+        layerParams.blobs[0] = scales;
+    }
+
+    int id = dstNet.addLayer(name, "PReLU", layerParams);
+    layer_id[name] = id;
+    connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+}
+
 // "Abs" "Tanh" "Sigmoid" "Relu" "Elu" "Exp" "Identity" "Relu6"
 void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
 {
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -1675,6 +1675,7 @@ TEST_P(Test_TensorFlow_layers, clip_by_value)

 TEST_P(Test_TensorFlow_layers, tf2_prelu)
 {
+    double l1 = 0, lInf = 0;
    if (backend == DNN_BACKEND_CUDA)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported; only across channels is supported
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
@ -1686,6 +1687,11 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu)
        applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
            CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
        );
+#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2023000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) {
+        l1 = 1e-4;
+        lInf = 1e-3;
+    }
 #elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
@ -1705,7 +1711,7 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif

-    runTensorFlowNet("tf2_prelu");
+    runTensorFlowNet("tf2_prelu", false, l1, lInf);
 }

 TEST_P(Test_TensorFlow_layers, tf2_permute_nhwc_ncwh)