mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 05:06:29 +08:00
Merge pull request #24056 from dkurt:eltwise_prelu
PReLU with element-wise scales #24056 ### Pull Request Readiness Checklist resolves https://github.com/opencv/opencv/issues/24051 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
ab6bffc6f8
commit
677a28fd2a
@ -138,7 +138,7 @@ public:
|
||||
{
|
||||
const float* srcptr = src_->ptr<float>(i) + stripeStart;
|
||||
float* dstptr = dst_->ptr<float>(i) + stripeStart;
|
||||
func_->apply(srcptr, dstptr, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
|
||||
func_->apply(srcptr, dstptr, stripeStart, (int)(stripeEnd - stripeStart), planeSize, 0, outCn);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -268,7 +268,7 @@ public:
|
||||
|
||||
void forwardSlice(const float* src, float* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
|
||||
{
|
||||
func.apply(src, dst, len, planeSize, cn0, cn1);
|
||||
func.apply(src, dst, -1, len, planeSize, cn0, cn1);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
@ -355,8 +355,9 @@ struct ReLUFunctor : public BaseFunctor
|
||||
backendId == DNN_BACKEND_CANN;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
CV_UNUSED(stripeStart);
|
||||
float s = slope;
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
|
||||
{
|
||||
@ -559,8 +560,9 @@ struct ReLU6Functor : public BaseFunctor
|
||||
backendId == DNN_BACKEND_CANN;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
CV_UNUSED(stripeStart);
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
|
||||
{
|
||||
int i = 0;
|
||||
@ -704,8 +706,9 @@ struct ReLU6Functor : public BaseFunctor
|
||||
template <class T>
|
||||
struct BaseDefaultFunctor : public BaseFunctor
|
||||
{
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
CV_UNUSED(stripeStart);
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
|
||||
{
|
||||
for( int i = 0; i < len; i++ )
|
||||
@ -2226,8 +2229,9 @@ struct PowerFunctor : public BaseFunctor
|
||||
shift = originShift;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
CV_UNUSED(stripeStart);
|
||||
float a = scale, b = shift, p = power;
|
||||
if( p == 1.f )
|
||||
{
|
||||
@ -2452,6 +2456,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
|
||||
Mat scale;
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat scale_umat;
|
||||
std::string oclKernelName = "ChannelsPReLUForward";
|
||||
#endif
|
||||
|
||||
explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_)
|
||||
@ -2470,8 +2475,9 @@ struct ChannelsPReLUFunctor : public BaseFunctor
|
||||
backendId == DNN_BACKEND_CANN;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
|
||||
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
CV_UNUSED(stripeStart);
|
||||
CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
|
||||
|
||||
const float* scaleptr = scale.ptr<float>();
|
||||
@ -2525,7 +2531,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
|
||||
UMat& src = inputs[i];
|
||||
UMat& dst = outputs[i];
|
||||
|
||||
ocl::Kernel kernel("PReLUForward", ocl::dnn::activations_oclsrc, buildopt);
|
||||
ocl::Kernel kernel(oclKernelName.c_str(), ocl::dnn::activations_oclsrc, buildopt);
|
||||
kernel.set(0, (int)src.total());
|
||||
kernel.set(1, (int)src.size[1]);
|
||||
kernel.set(2, (int)total(shape(src), 2));
|
||||
@ -2605,6 +2611,75 @@ struct ChannelsPReLUFunctor : public BaseFunctor
|
||||
int64 getFLOPSPerElement() const { return 1; }
|
||||
};
|
||||
|
||||
struct PReLUFunctor : public ChannelsPReLUFunctor
|
||||
{
|
||||
explicit PReLUFunctor(const Mat& scale_=Mat()) : ChannelsPReLUFunctor(scale_)
|
||||
{
|
||||
#ifdef HAVE_OPENCL
|
||||
oclKernelName = "PReLUForward";
|
||||
#endif
|
||||
}
|
||||
|
||||
bool supportBackend(int backendId, int)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CANN ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const
|
||||
{
|
||||
CV_UNUSED(stripeStart);
|
||||
CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
|
||||
|
||||
if (stripeStart < 0)
|
||||
CV_Error(Error::StsNotImplemented, "PReLUFunctor requires stripe offset parameter");
|
||||
|
||||
const float* scaleptr = scale.ptr<float>() + cn0 * planeSize + stripeStart;
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize, scaleptr += planeSize )
|
||||
{
|
||||
int i = 0;
|
||||
#if CV_SIMD128
|
||||
v_float32x4 z = v_setzero_f32();
|
||||
for( ; i <= len - 16; i += 16 )
|
||||
{
|
||||
v_float32x4 x0 = v_load(srcptr + i);
|
||||
v_float32x4 x1 = v_load(srcptr + i + 4);
|
||||
v_float32x4 x2 = v_load(srcptr + i + 8);
|
||||
v_float32x4 x3 = v_load(srcptr + i + 12);
|
||||
v_float32x4 s0 = v_load(scaleptr + i);
|
||||
v_float32x4 s1 = v_load(scaleptr + i + 4);
|
||||
v_float32x4 s2 = v_load(scaleptr + i + 8);
|
||||
v_float32x4 s3 = v_load(scaleptr + i + 12);
|
||||
x0 = v_select(x0 >= z, x0, x0*s0);
|
||||
x1 = v_select(x1 >= z, x1, x1*s1);
|
||||
x2 = v_select(x2 >= z, x2, x2*s2);
|
||||
x3 = v_select(x3 >= z, x3, x3*s3);
|
||||
v_store(dstptr + i, x0);
|
||||
v_store(dstptr + i + 4, x1);
|
||||
v_store(dstptr + i + 8, x2);
|
||||
v_store(dstptr + i + 12, x3);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
float x = srcptr[i];
|
||||
float s = scaleptr[i];
|
||||
dstptr[i] = x >= 0.f ? x : s*x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
|
||||
{
|
||||
auto shape = getShape<size_t>(scale);
|
||||
auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, scale.ptr<float>());
|
||||
return std::make_shared<ngraph::op::PRelu>(node, slope);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
};
|
||||
|
||||
struct SignFunctor : public BaseDefaultFunctor<SignFunctor>
|
||||
{
|
||||
typedef SignLayer Layer;
|
||||
@ -3040,13 +3115,26 @@ Ptr<ExpLayer> ExpLayer::create(const LayerParams& params)
|
||||
Ptr<Layer> ChannelsPReLULayer::create(const LayerParams& params)
|
||||
{
|
||||
CV_Assert(params.blobs.size() == 1);
|
||||
if (params.blobs[0].total() == 1)
|
||||
Mat scale = params.blobs[0];
|
||||
float slope = *scale.ptr<float>();
|
||||
if (scale.total() == 1 || countNonZero(scale != slope) == 0)
|
||||
{
|
||||
LayerParams reluParams = params;
|
||||
reluParams.set("negative_slope", *params.blobs[0].ptr<float>());
|
||||
reluParams.set("negative_slope", slope);
|
||||
return ReLULayer::create(reluParams);
|
||||
}
|
||||
Ptr<ChannelsPReLULayer> l(new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(params.blobs[0])));
|
||||
|
||||
Ptr<Layer> l;
|
||||
// Check first two dimensions of scale (batch, channels)
|
||||
MatShape scaleShape = shape(scale);
|
||||
if (std::count_if(scaleShape.begin(), scaleShape.end(), [](int d){ return d != 1;}) > 1)
|
||||
{
|
||||
l = new ElementWiseLayer<PReLUFunctor>(PReLUFunctor(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
l = new ElementWiseLayer<ChannelsPReLUFunctor>(ChannelsPReLUFunctor(scale));
|
||||
}
|
||||
l->setParamsFrom(params);
|
||||
|
||||
return l;
|
||||
|
@ -73,14 +73,23 @@ __kernel void ReLU6Forward(const int count, __global const T* in, __global T* ou
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void ChannelsPReLUForward(const int count, const int channels, const int plane_size,
|
||||
__global const T* in, __global T* out,
|
||||
__global const KERNEL_ARG_DTYPE* slope_data)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
int c = (index / plane_size) % channels;
|
||||
if(index < count)
|
||||
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
|
||||
}
|
||||
|
||||
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
|
||||
__global const T* in, __global T* out,
|
||||
__global const KERNEL_ARG_DTYPE* slope_data)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
int c = (index / plane_size) % channels;
|
||||
if(index < count)
|
||||
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
|
||||
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[index];
|
||||
}
|
||||
|
||||
__kernel void TanHForward(const int count, __global T* in, __global T* out) {
|
||||
@ -352,4 +361,4 @@ __kernel void ReciprocalForward(const int n, __global T* in, __global T* out)
|
||||
int index = get_global_id(0);
|
||||
if(index < n)
|
||||
out[index] = 1.0f/in[index];
|
||||
}
|
||||
}
|
||||
|
@ -589,6 +589,7 @@ private:
|
||||
void parsePack (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
void parseClipByValue (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
void parseLeakyRelu (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
void parsePReLU (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
void parseActivation (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
void parseExpandDims (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
void parseSquare (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
|
||||
@ -668,6 +669,7 @@ TFImporter::DispatchMap TFImporter::buildDispatchMap()
|
||||
dispatch["Pack"] = &TFImporter::parsePack;
|
||||
dispatch["ClipByValue"] = &TFImporter::parseClipByValue;
|
||||
dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu;
|
||||
dispatch["PReLU"] = &TFImporter::parsePReLU;
|
||||
dispatch["Abs"] = dispatch["Tanh"] = dispatch["Sigmoid"] = dispatch["Relu"] =
|
||||
dispatch["Elu"] = dispatch["Exp"] = dispatch["Identity"] = dispatch["Relu6"] = &TFImporter::parseActivation;
|
||||
dispatch["ExpandDims"] = &TFImporter::parseExpandDims;
|
||||
@ -2622,6 +2624,27 @@ void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::Nod
|
||||
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
|
||||
}
|
||||
|
||||
void TFImporter::parsePReLU(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||
{
|
||||
const std::string& name = layer.name();
|
||||
|
||||
Mat scales;
|
||||
blobFromTensor(getConstBlob(layer, value_id, 1), scales);
|
||||
|
||||
layerParams.blobs.resize(1);
|
||||
|
||||
if (scales.dims == 3) {
|
||||
// Considering scales from Keras wih HWC layout;
|
||||
transposeND(scales, {2, 0, 1}, layerParams.blobs[0]);
|
||||
} else {
|
||||
layerParams.blobs[0] = scales;
|
||||
}
|
||||
|
||||
int id = dstNet.addLayer(name, "PReLU", layerParams);
|
||||
layer_id[name] = id;
|
||||
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
|
||||
}
|
||||
|
||||
// "Abs" "Tanh" "Sigmoid" "Relu" "Elu" "Exp" "Identity" "Relu6"
|
||||
void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
|
||||
{
|
||||
|
@ -1675,6 +1675,7 @@ TEST_P(Test_TensorFlow_layers, clip_by_value)
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, tf2_prelu)
|
||||
{
|
||||
double l1 = 0, lInf = 0;
|
||||
if (backend == DNN_BACKEND_CUDA)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported; only across channels is supported
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
|
||||
@ -1686,6 +1687,11 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu)
|
||||
applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16,
|
||||
CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION
|
||||
);
|
||||
#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2023000000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) {
|
||||
l1 = 1e-4;
|
||||
lInf = 1e-3;
|
||||
}
|
||||
#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
@ -1705,7 +1711,7 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
#endif
|
||||
|
||||
runTensorFlowNet("tf2_prelu");
|
||||
runTensorFlowNet("tf2_prelu", false, l1, lInf);
|
||||
}
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, tf2_permute_nhwc_ncwh)
|
||||
|
Loading…
Reference in New Issue
Block a user