Merge pull request #24056 from dkurt:eltwise_prelu

PReLU with element-wise scales #24056

### Pull Request Readiness Checklist

resolves https://github.com/opencv/opencv/issues/24051

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Dmitry Kurtaev 2023-07-27 16:36:40 +03:00 committed by GitHub
parent ab6bffc6f8
commit 677a28fd2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 141 additions and 15 deletions

View File

@ -138,7 +138,7 @@ public:
{
const float* srcptr = src_->ptr<float>(i) + stripeStart;
float* dstptr = dst_->ptr<float>(i) + stripeStart;
func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
func_->apply(srcptr, dstptr, stripeStart, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
}
}
};
@ -268,7 +268,7 @@ public:
void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
{
func.apply(src, dst, len, planeSize, cn0, cn1);
func.apply(src, dst, -1, len, planeSize, cn0, cn1);
}
#ifdef HAVE_CUDA
@ -355,8 +355,9 @@ struct ReLUFunctor : public BaseFunctor
backendId == DNN_BACKEND_CANN;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
{
CV_UNUSED(stripeStart);
float s = slope;
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
@ -559,8 +560,9 @@ struct ReLU6Functor : public BaseFunctor
backendId == DNN_BACKEND_CANN;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
{
CV_UNUSED(stripeStart);
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
int i = 0;
@ -704,8 +706,9 @@ struct ReLU6Functor : public BaseFunctor
template <class T>
struct BaseDefaultFunctor : public BaseFunctor
{
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
{
CV_UNUSED(stripeStart);
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
for( int i = 0; i < len; i++ )
@ -2226,8 +2229,9 @@ struct PowerFunctor : public BaseFunctor
shift = originShift;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
{
CV_UNUSED(stripeStart);
float a = scale, b = shift, p = power;
if( p == 1.f )
{
@ -2452,6 +2456,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
Mat scale;
#ifdef HAVE_OPENCL
UMat scale_umat;
std::string oclKernelName = "ChannelsPReLUForward";
#endif
explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
@ -2470,8 +2475,9 @@ struct ChannelsPReLUFunctor : public BaseFunctor
backendId == DNN_BACKEND_CANN;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
{
CV_UNUSED(stripeStart);
CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
const float* scaleptr = scale.ptr<float>();
@ -2525,7 +2531,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
UMat& src = inputs[i];
UMat& dst = outputs[i];
ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
ocl::Kernel kernel(oclKernelName.c_str(), ocl::dnn::activations_oclsrc, buildopt);
kernel.set(0, (int)src.total());
kernel.set(1, (int)src.size[1]);
kernel.set(2, (int)total(shape(src), 2));
@ -2605,6 +2611,75 @@ struct ChannelsPReLUFunctor : public BaseFunctor
int64 getFLOPSPerElement() const { return 1; }
};
struct PReLUFunctor : public ChannelsPReLUFunctor
{
explicit PReLUFunctor(const Mat& scale_=Mat()) : ChannelsPReLUFunctor(scale_)
{
#ifdef HAVE_OPENCL
oclKernelName = "PReLUForward";
#endif
}
bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CANN ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
}
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
{
CV_UNUSED(stripeStart);
CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
if (stripeStart < 0)
CV_Error(Error::StsNotImplemented, "PReLUFunctor requires stripe offset parameter");
const float* scaleptr = scale.ptr<float>() + cn0 * planeSize + stripeStart;
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize, scaleptr += planeSize )
{
int i = 0;
#if CV_SIMD128
v_float32x4 z = v_setzero_f32();
for( ; i <= len - 16; i += 16 )
{
v_float32x4 x0 = v_load(srcptr + i);
v_float32x4 x1 = v_load(srcptr + i + 4);
v_float32x4 x2 = v_load(srcptr + i + 8);
v_float32x4 x3 = v_load(srcptr + i + 12);
v_float32x4 s0 = v_load(scaleptr + i);
v_float32x4 s1 = v_load(scaleptr + i + 4);
v_float32x4 s2 = v_load(scaleptr + i + 8);
v_float32x4 s3 = v_load(scaleptr + i + 12);
x0 = v_select(x0 >= z, x0, x0*s0);
x1 = v_select(x1 >= z, x1, x1*s1);
x2 = v_select(x2 >= z, x2, x2*s2);
x3 = v_select(x3 >= z, x3, x3*s3);
v_store(dstptr + i, x0);
v_store(dstptr + i + 4, x1);
v_store(dstptr + i + 8, x2);
v_store(dstptr + i + 12, x3);
}
#endif
for( ; i < len; i++ )
{
float x = srcptr[i];
float s = scaleptr[i];
dstptr[i] = x >= 0.f ? x : s*x;
}
}
}
#ifdef HAVE_DNN_NGRAPH
std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
{
auto shape = getShape<size_t>(scale);
auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, scale.ptr<float>());
return std::make_shared<ngraph::op::PRelu>(node, slope);
}
#endif // HAVE_DNN_NGRAPH
};
struct SignFunctor : public BaseDefaultFunctor<SignFunctor>
{
typedef SignLayer Layer;
@ -3040,13 +3115,26 @@ Ptr<ExpLayer> ExpLayer::create(const LayerParams& params)
Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
{
CV_Assert(params.blobs.size() == 1);
if (params.blobs[0].total() == 1)
Mat scale = params.blobs[0];
float slope = *scale.ptr<float>();
if (scale.total() == 1 || countNonZero(scale != slope) == 0)
{
LayerParams reluParams = params;
reluParams.set("negative_slope", *params.blobs[0].ptr<float>());
reluParams.set("negative_slope", slope);
return ReLULayer::create(reluParams);
}
Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
Ptr<Layer> l;
// Check first two dimensions of scale (batch, channels)
MatShape scaleShape = shape(scale);
if (std::count_if(scaleShape.begin(), scaleShape.end(), [](int d){ return d != 1;}) > 1)
{
l = new ElementWiseLayer<PReLUFunctor>(PReLUFunctor(scale));
}
else
{
l = new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(scale));
}
l->setParamsFrom(params);
return l;

View File

@ -73,14 +73,23 @@ __kernel void ReLU6Forward(const int count, __global const T* in, __global T* ou
}
}
__kernel void ChannelsPReLUForward(const int count, const int channels, const int plane_size,
__global const T* in, __global T* out,
__global const KERNEL_ARG_DTYPE* slope_data)
{
int index = get_global_id(0);
int c = (index / plane_size) % channels;
if(index < count)
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
}
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
__global const T* in, __global T* out,
__global const KERNEL_ARG_DTYPE* slope_data)
{
int index = get_global_id(0);
int c = (index / plane_size) % channels;
if(index < count)
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[index];
}
__kernel void TanHForward(const int count, __global T* in, __global T* out) {
@ -352,4 +361,4 @@ __kernel void ReciprocalForward(const int n, __global T* in, __global T* out)
int index = get_global_id(0);
if(index < n)
out[index] = 1.0f/in[index];
}
}

View File

@ -589,6 +589,7 @@ private:
void parsePack (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
void parseClipByValue (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
void parseLeakyRelu (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
void parsePReLU (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
void parseActivation (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
void parseExpandDims (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
void parseSquare (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
@ -668,6 +669,7 @@ TFImporter::DispatchMap TFImporter::buildDispatchMap()
dispatch["Pack"] = &TFImporter::parsePack;
dispatch["ClipByValue"] = &TFImporter::parseClipByValue;
dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu;
dispatch["PReLU"] = &TFImporter::parsePReLU;
dispatch["Abs"] = dispatch["Tanh"] = dispatch["Sigmoid"] = dispatch["Relu"] =
dispatch["Elu"] = dispatch["Exp"] = dispatch["Identity"] = dispatch["Relu6"] = &TFImporter::parseActivation;
dispatch["ExpandDims"] = &TFImporter::parseExpandDims;
@ -2622,6 +2624,27 @@ void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::Nod
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
}
void TFImporter::parsePReLU(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
{
const std::string& name = layer.name();
Mat scales;
blobFromTensor(getConstBlob(layer, value_id, 1), scales);
layerParams.blobs.resize(1);
if (scales.dims == 3) {
// Considering scales from Keras wih HWC layout;
transposeND(scales, {2, 0, 1}, layerParams.blobs[0]);
} else {
layerParams.blobs[0] = scales;
}
int id = dstNet.addLayer(name, "PReLU", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
// "Abs" "Tanh" "Sigmoid" "Relu" "Elu" "Exp" "Identity" "Relu6"
void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
{

View File

@ -1675,6 +1675,7 @@ TEST_P(Test_TensorFlow_layers, clip_by_value)
TEST_P(Test_TensorFlow_layers, tf2_prelu)
{
double l1 = 0, lInf = 0;
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported; only across channels is supported
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
@ -1686,6 +1687,11 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu)
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
);
#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2023000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) {
l1 = 1e-4;
lInf = 1e-3;
}
#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
@ -1705,7 +1711,7 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
#endif
runTensorFlowNet("tf2_prelu");
runTensorFlowNet("tf2_prelu", false, l1, lInf);
}
TEST_P(Test_TensorFlow_layers, tf2_permute_nhwc_ncwh)