mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
ReLU6 layer ocl support
include relu6 ocl kernel and layer fusion support Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
0e4eed0ba1
commit
2863f950d6
@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
class CV_EXPORTS ReLU6Layer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float minValue, maxValue;
|
||||
|
||||
static Ptr<ReLU6Layer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
|
@ -1439,6 +1439,7 @@ struct Net::Impl
|
||||
nextData &&
|
||||
((nextData->type == "ReLU") ||
|
||||
(nextData->type == "ChannelsPReLU") ||
|
||||
(nextData->type == "ReLU6") ||
|
||||
(nextData->type == "TanH") ||
|
||||
(nextData->type == "Power"))) )
|
||||
{
|
||||
|
@ -860,6 +860,15 @@ public:
|
||||
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
|
||||
}
|
||||
|
||||
Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
|
||||
if( !activ_relu6.empty() )
|
||||
{
|
||||
reluslope.resize(2);
|
||||
reluslope[0] = activ_relu6->minValue;
|
||||
reluslope[1] = activ_relu6->maxValue;
|
||||
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
|
||||
}
|
||||
|
||||
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
|
||||
if( !activ_chprelu.empty() )
|
||||
{
|
||||
@ -906,12 +915,17 @@ public:
|
||||
{
|
||||
convolutionOp->setActivTanh(true);
|
||||
}
|
||||
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
|
||||
{
|
||||
convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
convolutionOp->setActivReLU(false, 0);
|
||||
convolutionOp->setActivPReLU(false, reluslope);
|
||||
convolutionOp->setActivPower(false, 1.f);
|
||||
convolutionOp->setActivTanh(false);
|
||||
convolutionOp->setActivReLU6(false, 0, 0);
|
||||
}
|
||||
newActiv = false;
|
||||
}
|
||||
|
@ -381,8 +381,30 @@ struct ReLU6Functor
|
||||
#ifdef HAVE_OPENCL
|
||||
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
// TODO: implement OCL version
|
||||
return false;
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
String buildopt = oclGetTMacro(inputs[0]);
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
UMat& src = inputs[i];
|
||||
UMat& dst = outputs[i];
|
||||
|
||||
ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
|
||||
kernel.set(0, (int)src.total());
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
|
||||
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
kernel.set(3, (float)minValue);
|
||||
kernel.set(4, (float)maxValue);
|
||||
|
||||
size_t gSize = src.total();
|
||||
CV_Assert(kernel.run(1, &gSize, NULL, false));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -867,6 +889,9 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
|
||||
float maxValue = params.get<float>("max_value", 6.0f);
|
||||
Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
|
||||
l->setParamsFrom(params);
|
||||
l->minValue = minValue;
|
||||
l->maxValue = maxValue;
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
|
@ -78,7 +78,8 @@ typedef enum {
|
||||
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
|
||||
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
|
||||
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
|
||||
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4
|
||||
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4,
|
||||
OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5
|
||||
} ocl4dnnFusedActiv_t;
|
||||
|
||||
template<typename Dtype>
|
||||
@ -96,6 +97,7 @@ class OCL4DNNConvSpatial
|
||||
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
|
||||
void setActivPower(bool fuse_activ, float power);
|
||||
void setActivTanh(bool fuse_activ);
|
||||
void setActivReLU6(bool fuse_activ, float min, float max);
|
||||
void setBias(bool bias_term);
|
||||
|
||||
private:
|
||||
@ -319,6 +321,8 @@ class OCL4DNNConvSpatial
|
||||
cv::ocl::ProgramSource src_;
|
||||
int32_t prev_kernel_type_;
|
||||
float negative_slope_;
|
||||
float min_value_;
|
||||
float max_value_;
|
||||
UMat negative_slope_umat_;
|
||||
ocl4dnnFusedActiv_t fused_activ_;
|
||||
float power_;
|
||||
|
@ -82,6 +82,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
|
||||
fused_eltwise_ = false;
|
||||
power_ = 1.f;
|
||||
negative_slope_ = 0;
|
||||
min_value_ = 0;
|
||||
max_value_ = 0;
|
||||
prev_kernel_type_ = -1;
|
||||
tuned_ = false;
|
||||
|
||||
@ -162,6 +164,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
|
||||
case OCL4DNN_CONV_FUSED_ACTIV_TANH:
|
||||
addDef("FUSED_CONV_TANH", 1);
|
||||
break;
|
||||
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
|
||||
addDef("FUSED_CONV_RELU6", 1);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
@ -184,6 +189,10 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo
|
||||
case OCL4DNN_CONV_FUSED_ACTIV_POWER:
|
||||
kernel.set(argIdx++, (float)power_);
|
||||
break;
|
||||
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
|
||||
kernel.set(argIdx++, (float)min_value_);
|
||||
kernel.set(argIdx++, (float)max_value_);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
@ -393,6 +402,19 @@ void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
|
||||
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
||||
}
|
||||
|
||||
template<typename Dtype>
|
||||
void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
|
||||
{
|
||||
if ( fuse_activ )
|
||||
{
|
||||
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
|
||||
min_value_ = min;
|
||||
max_value_ = max;
|
||||
}
|
||||
else
|
||||
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
||||
}
|
||||
|
||||
template<typename Dtype>
|
||||
void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
|
||||
{
|
||||
|
@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
|
||||
#endif
|
||||
}
|
||||
|
||||
__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
|
||||
const T minValue, const T maxValue)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index < count)
|
||||
{
|
||||
T x = in[index];
|
||||
out[index] = clamp(x, minValue, maxValue);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
|
||||
__global const T* in, __global T* out, __global const T* slope_data)
|
||||
{
|
||||
|
@ -48,19 +48,22 @@
|
||||
|
||||
#if defined(FUSED_CONV_RELU)
|
||||
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
|
||||
#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
|
||||
#define FUSED_ARG Dtype negative_slope,
|
||||
#elif defined(FUSED_CONV_PRELU)
|
||||
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
|
||||
#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
|
||||
#define FUSED_ARG __global const Dtype *negative_slope,
|
||||
#elif defined(FUSED_CONV_POWER)
|
||||
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
|
||||
#define NEGATIVE_SLOPE_ARG Dtype power,
|
||||
#define FUSED_ARG Dtype power,
|
||||
#elif defined(FUSED_CONV_TANH)
|
||||
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
|
||||
#define NEGATIVE_SLOPE_ARG
|
||||
#define FUSED_ARG
|
||||
#elif defined(FUSED_CONV_RELU6)
|
||||
#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
|
||||
#define FUSED_ARG Dtype min_value, Dtype max_value,
|
||||
#else
|
||||
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
|
||||
#define NEGATIVE_SLOPE_ARG
|
||||
#define FUSED_ARG
|
||||
#endif
|
||||
|
||||
#ifdef FUSED_CONV_ELTWISE
|
||||
@ -108,7 +111,7 @@
|
||||
|
||||
__kernel void ConvolveBasic(
|
||||
ELTWISE_DATA_ARG
|
||||
NEGATIVE_SLOPE_ARG
|
||||
FUSED_ARG
|
||||
__global Dtype* image_data,
|
||||
int image_offset,
|
||||
__global Dtype* kernel_data,
|
||||
@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
|
||||
__kernel void
|
||||
convolve_simd(
|
||||
ELTWISE_DATA_ARG
|
||||
NEGATIVE_SLOPE_ARG
|
||||
FUSED_ARG
|
||||
__global Dtype* inputs_base,
|
||||
filter_qualifier Dtype* weights_base,
|
||||
BIAS_KERNEL_ARG
|
||||
@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
|
||||
|
||||
#define GEMM_LIKE_KERNEL_ARGS \
|
||||
ELTWISE_DATA_ARG \
|
||||
NEGATIVE_SLOPE_ARG \
|
||||
FUSED_ARG \
|
||||
const __global Dtype *src0, \
|
||||
const __global Dtype *src1, \
|
||||
BIAS_KERNEL_ARG \
|
||||
@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
|
||||
|
||||
__kernel void DWCONV(
|
||||
ELTWISE_DATA_ARG
|
||||
NEGATIVE_SLOPE_ARG
|
||||
FUSED_ARG
|
||||
__global Dtype* image_data,
|
||||
__global Dtype* kernel_data,
|
||||
BIAS_KERNEL_ARG
|
||||
|
Loading…
Reference in New Issue
Block a user