mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
Merge pull request #10672 from pengli:dnn
This commit is contained in:
commit
8c2563be41
@ -1190,7 +1190,8 @@ struct Net::Impl
|
|||||||
|
|
||||||
// TODO: OpenCL target support more fusion styles.
|
// TODO: OpenCL target support more fusion styles.
|
||||||
if ( preferableTarget == DNN_TARGET_OPENCL &&
|
if ( preferableTarget == DNN_TARGET_OPENCL &&
|
||||||
(!cv::ocl::useOpenCL() || ld.layerInstance->type.compare("Convolution")) )
|
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
|
||||||
|
ld.layerInstance->type != "MVN")) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Ptr<Layer>& currLayer = ld.layerInstance;
|
Ptr<Layer>& currLayer = ld.layerInstance;
|
||||||
@ -1239,13 +1240,14 @@ struct Net::Impl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU/Power
|
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
|
||||||
if ( preferableTarget != DNN_TARGET_OPENCL ||
|
if ( preferableTarget != DNN_TARGET_OPENCL ||
|
||||||
(preferableTarget == DNN_TARGET_OPENCL &&
|
(preferableTarget == DNN_TARGET_OPENCL &&
|
||||||
nextData &&
|
nextData &&
|
||||||
(!nextData->type.compare("ReLU") ||
|
((nextData->type == "ReLU") ||
|
||||||
!nextData->type.compare("ChannelsPReLU") ||
|
(nextData->type == "ChannelsPReLU") ||
|
||||||
!nextData->type.compare("Power"))) )
|
(nextData->type == "TanH") ||
|
||||||
|
(nextData->type == "Power"))) )
|
||||||
{
|
{
|
||||||
|
|
||||||
Ptr<ActivationLayer> nextActivLayer;
|
Ptr<ActivationLayer> nextActivLayer;
|
||||||
|
@ -81,9 +81,6 @@ public:
|
|||||||
dstWeightsData[i] = w;
|
dstWeightsData[i] = w;
|
||||||
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
|
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
|
||||||
}
|
}
|
||||||
|
|
||||||
umat_weight = weights_.getUMat(ACCESS_READ);
|
|
||||||
umat_bias = bias_.getUMat(ACCESS_READ);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void getScaleShift(Mat& scale, Mat& shift) const
|
void getScaleShift(Mat& scale, Mat& shift) const
|
||||||
@ -119,6 +116,12 @@ public:
|
|||||||
CV_Assert(blobs.size() >= 2);
|
CV_Assert(blobs.size() >= 2);
|
||||||
CV_Assert(inputs.size() == 1);
|
CV_Assert(inputs.size() == 1);
|
||||||
|
|
||||||
|
if (umat_weight.empty())
|
||||||
|
{
|
||||||
|
umat_weight = weights_.getUMat(ACCESS_READ);
|
||||||
|
umat_bias = bias_.getUMat(ACCESS_READ);
|
||||||
|
}
|
||||||
|
|
||||||
UMat &inpBlob = inputs[0];
|
UMat &inpBlob = inputs[0];
|
||||||
CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4);
|
CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4);
|
||||||
int groups = inpBlob.size[0];
|
int groups = inpBlob.size[0];
|
||||||
|
@ -246,6 +246,11 @@ public:
|
|||||||
power = activ_power->power;
|
power = activ_power->power;
|
||||||
activType = OCL4DNN_CONV_FUSED_ACTIV_POWER;
|
activType = OCL4DNN_CONV_FUSED_ACTIV_POWER;
|
||||||
}
|
}
|
||||||
|
Ptr<TanHLayer> activ_tanh = activ.dynamicCast<TanHLayer>();
|
||||||
|
if (!activ_tanh.empty())
|
||||||
|
{
|
||||||
|
activType = OCL4DNN_CONV_FUSED_ACTIV_TANH;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return !activ.empty();
|
return !activ.empty();
|
||||||
@ -877,11 +882,16 @@ public:
|
|||||||
{
|
{
|
||||||
convolutionOp->setActivPower(true, power);
|
convolutionOp->setActivPower(true, power);
|
||||||
}
|
}
|
||||||
|
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_TANH)
|
||||||
|
{
|
||||||
|
convolutionOp->setActivTanh(true);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
convolutionOp->setActivReLU(false, 0);
|
convolutionOp->setActivReLU(false, 0);
|
||||||
convolutionOp->setActivPReLU(false, reluslope);
|
convolutionOp->setActivPReLU(false, reluslope);
|
||||||
convolutionOp->setActivPower(false, 1.f);
|
convolutionOp->setActivPower(false, 1.f);
|
||||||
|
convolutionOp->setActivTanh(false);
|
||||||
}
|
}
|
||||||
newActiv = false;
|
newActiv = false;
|
||||||
}
|
}
|
||||||
|
@ -60,6 +60,36 @@ public:
|
|||||||
normVariance = params.get<bool>("normalize_variance", true);
|
normVariance = params.get<bool>("normalize_variance", true);
|
||||||
acrossChannels = params.get<bool>("across_channels", false);
|
acrossChannels = params.get<bool>("across_channels", false);
|
||||||
eps = params.get<double>("eps", 1e-9);
|
eps = params.get<double>("eps", 1e-9);
|
||||||
|
fuse_batch_norm = false;
|
||||||
|
fuse_relu = false;
|
||||||
|
relu_slope = 0.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr<BatchNormLayer> bnorm;
|
||||||
|
Mat scale, shift;
|
||||||
|
UMat bnorm_weight, bnorm_bias;
|
||||||
|
bool fuse_batch_norm;
|
||||||
|
|
||||||
|
bool setBatchNorm(const Ptr<BatchNormLayer>& layer )
|
||||||
|
{
|
||||||
|
bnorm = layer;
|
||||||
|
fuse_batch_norm = !bnorm.empty() && (preferableTarget == DNN_TARGET_OPENCL);
|
||||||
|
return fuse_batch_norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr<ReLULayer> activ_relu;
|
||||||
|
float relu_slope;
|
||||||
|
bool fuse_relu;
|
||||||
|
bool setActivation(const Ptr<ActivationLayer>& layer)
|
||||||
|
{
|
||||||
|
if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL)
|
||||||
|
{
|
||||||
|
activ_relu = layer.dynamicCast<ReLULayer>();
|
||||||
|
if( !activ_relu.empty() )
|
||||||
|
relu_slope = activ_relu->negativeSlope;
|
||||||
|
}
|
||||||
|
fuse_relu = !activ_relu.empty();
|
||||||
|
return fuse_relu;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
@ -71,19 +101,24 @@ public:
|
|||||||
inputs_.getUMatVector(inputs);
|
inputs_.getUMatVector(inputs);
|
||||||
outputs_.getUMatVector(outputs);
|
outputs_.getUMatVector(outputs);
|
||||||
|
|
||||||
|
if( fuse_batch_norm && scale.empty())
|
||||||
|
{
|
||||||
|
bnorm->getScaleShift(scale, shift);
|
||||||
|
bnorm_weight = scale.getUMat(ACCESS_READ);
|
||||||
|
bnorm_bias = shift.getUMat(ACCESS_READ);
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
|
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
|
||||||
{
|
{
|
||||||
UMat &inpBlob = inputs[inpIdx];
|
UMat &inpMat = inputs[inpIdx];
|
||||||
UMat &outBlob = outputs[inpIdx];
|
UMat &outMat = outputs[inpIdx];
|
||||||
|
|
||||||
int splitDim = (acrossChannels) ? 1 : 2;
|
int splitDim = (acrossChannels) ? 1 : 2;
|
||||||
int i, newRows = 1;
|
int i, newRows = 1;
|
||||||
for( i = 0; i < splitDim; i++ )
|
for( i = 0; i < splitDim; i++ )
|
||||||
newRows *= inpBlob.size[i];
|
newRows *= inpMat.size[i];
|
||||||
|
|
||||||
MatShape s = shape(newRows, inpBlob.total() / newRows);
|
MatShape s = shape(newRows, inpMat.total() / newRows);
|
||||||
UMat& inpMat = inpBlob;
|
|
||||||
UMat& outMat = outBlob;
|
|
||||||
UMat oneMat = UMat::ones(s[1], 1, CV_32F);
|
UMat oneMat = UMat::ones(s[1], 1, CV_32F);
|
||||||
UMat meanMat = UMat(s[0], 1, CV_32F);
|
UMat meanMat = UMat(s[0], 1, CV_32F);
|
||||||
UMat devMat = UMat(s[0], 1, CV_32F);
|
UMat devMat = UMat(s[0], 1, CV_32F);
|
||||||
@ -121,8 +156,9 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
String kname = format("mvn%d", number);
|
String kname = format("mvn%d", number);
|
||||||
if (normVariance)
|
buildopt += format("%s %s %s ", (normVariance) ? "-DNORM_VARIANCE" : "",
|
||||||
buildopt += "-DNORM_VARIANCE";
|
(fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "",
|
||||||
|
(fuse_relu) ? "-DFUSE_RELU" : "");
|
||||||
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
||||||
if (kernel1.empty())
|
if (kernel1.empty())
|
||||||
return false;
|
return false;
|
||||||
@ -132,7 +168,11 @@ public:
|
|||||||
kernel1.set(3, (float)eps);
|
kernel1.set(3, (float)eps);
|
||||||
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
|
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||||
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
|
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
|
||||||
kernel1.set(6, ocl::KernelArg::PtrWriteOnly(outMat));
|
kernel1.set(6, ocl::KernelArg::PtrReadOnly(bnorm_weight));
|
||||||
|
kernel1.set(7, ocl::KernelArg::PtrReadOnly(bnorm_bias));
|
||||||
|
kernel1.set(8, (int)inpMat.size[1]);
|
||||||
|
kernel1.set(9, (float)relu_slope);
|
||||||
|
kernel1.set(10, ocl::KernelArg::PtrWriteOnly(outMat));
|
||||||
ret = kernel1.run(2, global, NULL, false);
|
ret = kernel1.run(2, global, NULL, false);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
return false;
|
return false;
|
||||||
|
@ -77,7 +77,8 @@ typedef enum {
|
|||||||
OCL4DNN_CONV_FUSED_ACTIV_NONE = 0,
|
OCL4DNN_CONV_FUSED_ACTIV_NONE = 0,
|
||||||
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
|
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
|
||||||
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
|
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
|
||||||
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3
|
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
|
||||||
|
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4
|
||||||
} ocl4dnnFusedActiv_t;
|
} ocl4dnnFusedActiv_t;
|
||||||
|
|
||||||
template<typename Dtype>
|
template<typename Dtype>
|
||||||
@ -94,6 +95,7 @@ class OCL4DNNConvSpatial
|
|||||||
void setActivReLU(bool fuse_activ, float slope);
|
void setActivReLU(bool fuse_activ, float slope);
|
||||||
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
|
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
|
||||||
void setActivPower(bool fuse_activ, float power);
|
void setActivPower(bool fuse_activ, float power);
|
||||||
|
void setActivTanh(bool fuse_activ);
|
||||||
void setBias(bool bias_term);
|
void setBias(bool bias_term);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -159,6 +159,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
|
|||||||
case OCL4DNN_CONV_FUSED_ACTIV_POWER:
|
case OCL4DNN_CONV_FUSED_ACTIV_POWER:
|
||||||
addDef("FUSED_CONV_POWER", 1);
|
addDef("FUSED_CONV_POWER", 1);
|
||||||
break;
|
break;
|
||||||
|
case OCL4DNN_CONV_FUSED_ACTIV_TANH:
|
||||||
|
addDef("FUSED_CONV_TANH", 1);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
@ -415,6 +418,17 @@ void OCL4DNNConvSpatial<Dtype>::setActivPower(bool fuse_activ, float power)
|
|||||||
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Dtype>
|
||||||
|
void OCL4DNNConvSpatial<Dtype>::setActivTanh(bool fuse_activ)
|
||||||
|
{
|
||||||
|
if ( fuse_activ )
|
||||||
|
{
|
||||||
|
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_TANH;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename Dtype>
|
template<typename Dtype>
|
||||||
bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
|
bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
|
||||||
const UMat& bottom2,
|
const UMat& bottom2,
|
||||||
|
@ -55,6 +55,9 @@
|
|||||||
#elif defined(FUSED_CONV_POWER)
|
#elif defined(FUSED_CONV_POWER)
|
||||||
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
|
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
|
||||||
#define NEGATIVE_SLOPE_ARG Dtype power,
|
#define NEGATIVE_SLOPE_ARG Dtype power,
|
||||||
|
#elif defined(FUSED_CONV_TANH)
|
||||||
|
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
|
||||||
|
#define NEGATIVE_SLOPE_ARG
|
||||||
#else
|
#else
|
||||||
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
|
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
|
||||||
#define NEGATIVE_SLOPE_ARG
|
#define NEGATIVE_SLOPE_ARG
|
||||||
|
@ -89,6 +89,10 @@ __kernel void MVN(__global const Dtype* src,
|
|||||||
const Dtype eps,
|
const Dtype eps,
|
||||||
__global const Dtype* mean,
|
__global const Dtype* mean,
|
||||||
__global const Dtype* dev,
|
__global const Dtype* dev,
|
||||||
|
__global const Dtype* bnorm_weight,
|
||||||
|
__global const Dtype* bnorm_bias,
|
||||||
|
const int channels,
|
||||||
|
const float relu_slope,
|
||||||
__global Dtype* dst)
|
__global Dtype* dst)
|
||||||
{
|
{
|
||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
@ -106,7 +110,21 @@ __kernel void MVN(__global const Dtype* src,
|
|||||||
#else
|
#else
|
||||||
alpha = 1;
|
alpha = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Dtype w = 1.f, b = 0.f;
|
||||||
|
#ifdef FUSE_BATCH_NORM
|
||||||
|
w = bnorm_weight[x % channels];
|
||||||
|
b = bnorm_bias[x % channels];
|
||||||
|
#endif
|
||||||
|
|
||||||
vec_type src_vec = load(src, index) - (vec_type)mean_val;
|
vec_type src_vec = load(src, index) - (vec_type)mean_val;
|
||||||
vec_type dst_vec = src_vec * alpha;
|
vec_type dst_vec = src_vec * alpha;
|
||||||
|
dst_vec = dst_vec * w + (vec_type)b;
|
||||||
|
|
||||||
|
#ifdef FUSE_RELU
|
||||||
|
vec_type new_val = dst_vec * relu_slope;
|
||||||
|
dst_vec = select(new_val, dst_vec, dst_vec > (vec_type)0.f);
|
||||||
|
#endif
|
||||||
|
|
||||||
store(dst_vec, dst, index);
|
store(dst_vec, dst, index);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user