mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 19:50:38 +08:00
MVN layer ocl implementation
Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
7bc017601f
commit
e77af4ae33
@ -43,6 +43,8 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "math_functions.hpp"
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -60,11 +62,93 @@ public:
|
||||
eps = params.get<double>("eps", 1e-9);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
|
||||
{
|
||||
UMat &inpBlob = inputs[inpIdx];
|
||||
UMat &outBlob = outputs[inpIdx];
|
||||
|
||||
int splitDim = (acrossChannels) ? 1 : 2;
|
||||
int i, newRows = 1;
|
||||
for( i = 0; i < splitDim; i++ )
|
||||
newRows *= inpBlob.size[i];
|
||||
|
||||
MatShape s = shape(newRows, inpBlob.total() / newRows);
|
||||
UMat& inpMat = inpBlob;
|
||||
UMat& outMat = outBlob;
|
||||
UMat oneMat = UMat::ones(s[1], 1, CV_32F);
|
||||
UMat meanMat = UMat(s[0], 1, CV_32F);
|
||||
UMat devMat = UMat(s[0], 1, CV_32F);
|
||||
UMat tmpMat = UMat(s[0], s[1], CV_32F);
|
||||
float alpha = 1.0f / s[1];
|
||||
|
||||
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
|
||||
inpMat, 0, oneMat, 0, 0.0f, meanMat, 0);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
|
||||
String buildopt = format("-DNUM=%d ", number);
|
||||
String kname = format("calc_mean%d", number);
|
||||
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
|
||||
kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel.set(1, (int)s[0]);
|
||||
kernel.set(2, (int)s[1]);
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||
kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
|
||||
ret = kernel.run(2, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
if (normVariance)
|
||||
{
|
||||
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
|
||||
tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
kname = format("mvn%d", number);
|
||||
if (normVariance)
|
||||
buildopt += "-DNORM_VARIANCE";
|
||||
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
||||
if (kernel1.empty())
|
||||
return false;
|
||||
kernel1.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel1.set(1, (int)s[0]);
|
||||
kernel1.set(2, (int)s[1]);
|
||||
kernel1.set(3, (float)eps);
|
||||
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
|
||||
kernel1.set(6, ocl::KernelArg::PtrWriteOnly(outMat));
|
||||
ret = kernel1.run(2, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
112
modules/dnn/src/opencl/mvn.cl
Normal file
112
modules/dnn/src/opencl/mvn.cl
Normal file
@ -0,0 +1,112 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define Dtype float
|
||||
#define Dtype4 float4
|
||||
#define Dtype8 float8
|
||||
|
||||
#if NUM == 8
|
||||
#define load(src, index) vload8(0, src + index)
|
||||
#define store(vec, dst, index) vstore8(vec, 0, dst + index)
|
||||
#define vec_type Dtype8
|
||||
#define CALC_MEAN calc_mean8
|
||||
#define MVN mvn8
|
||||
#elif NUM == 4
|
||||
#define load(src, index) vload4(0, src + index)
|
||||
#define store(vec, dst, index) vstore4(vec, 0, dst + index)
|
||||
#define vec_type Dtype4
|
||||
#define CALC_MEAN calc_mean4
|
||||
#define MVN mvn4
|
||||
#elif NUM == 1
|
||||
#define load(src, index) src[index]
|
||||
#define store(vec, dst, index) dst[index] = vec
|
||||
#define vec_type Dtype
|
||||
#define CALC_MEAN calc_mean1
|
||||
#define MVN mvn1
|
||||
#endif
|
||||
|
||||
__kernel void CALC_MEAN(__global const Dtype* src,
|
||||
const int rows,
|
||||
const int cols,
|
||||
__global Dtype* mean,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * NUM;
|
||||
int index = x * cols + y;
|
||||
|
||||
if (x >= rows || y >= cols)
|
||||
return;
|
||||
|
||||
Dtype mean_val = mean[x];
|
||||
vec_type src_vec = load(src, index);
|
||||
vec_type dst_vec = pow(src_vec - (vec_type)mean_val, 2);
|
||||
store(dst_vec, dst, index);
|
||||
}
|
||||
|
||||
__kernel void MVN(__global const Dtype* src,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const Dtype eps,
|
||||
__global const Dtype* mean,
|
||||
__global const Dtype* dev,
|
||||
__global Dtype* dst)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * NUM;
|
||||
int index = x * cols + y;
|
||||
|
||||
if (x >= rows || y >= cols)
|
||||
return;
|
||||
|
||||
Dtype mean_val = mean[x];
|
||||
Dtype dev_val = sqrt(dev[x]);
|
||||
Dtype alpha;
|
||||
#ifdef NORM_VARIANCE
|
||||
alpha = 1 / (eps + dev_val);
|
||||
#else
|
||||
alpha = 1;
|
||||
#endif
|
||||
vec_type src_vec = load(src, index) - (vec_type)mean_val;
|
||||
vec_type dst_vec = src_vec * alpha;
|
||||
store(dst_vec, dst, index);
|
||||
}
|
@ -202,6 +202,11 @@ TEST(Layer_Test_MVN, Accuracy)
|
||||
testLayerUsingCaffeModels("layer_mvn");
|
||||
}
|
||||
|
||||
OCL_TEST(Layer_Test_MVN, Accuracy)
|
||||
{
|
||||
testLayerUsingCaffeModels("layer_mvn", DNN_TARGET_OPENCL);
|
||||
}
|
||||
|
||||
void testReshape(const MatShape& inputShape, const MatShape& targetShape,
|
||||
int axis = 0, int num_axes = -1,
|
||||
MatShape mask = MatShape())
|
||||
|
Loading…
Reference in New Issue
Block a user