Merge pull request #10625 from pengli:dnn

This commit is contained in:
Alexander Alekhin 2018-01-18 18:26:30 +00:00
commit 8226bd25c4
5 changed files with 187 additions and 56 deletions

View File

@ -46,6 +46,7 @@
#include "opencv2/core/hal/hal.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include <iostream>
#include "opencl_kernels_dnn.hpp"
#ifdef HAVE_OPENCL
using namespace cv::dnn::ocl4dnn;
@ -1051,6 +1052,8 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
{
public:
Mat weightsMat, biasesMat;
UMat umat_weights;
UMat umat_biases;
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const
{
@ -1341,11 +1344,107 @@ public:
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
int outCn = numOutput;
int inpCn = inputs[0].size[1];
if (is1x1())
return false;
if (umat_weights.empty())
{
transpose(blobs[0].reshape(1, inpCn), umat_weights);
umat_biases = hasBias() ? blobs[1].reshape(1, outCn).getUMat(ACCESS_READ) :
UMat::zeros(outCn, 1, CV_32F);
}
String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type()));
buildopt += format("-DPAD_H=%d -DPAD_W=%d -DKERNEL_H=%d -DKERNEL_W=%d -DSTRIDE_H=%d -DSTRIDE_W=%d ",
pad.height, pad.width, kernel.height, kernel.width, stride.height, stride.width);
for (size_t ii = 0; ii < outputs.size(); ii++)
{
int ngroups = outCn / blobs[0].size[1];
int inpGroupCn = inpCn / ngroups;
int outGroupCn = blobs[0].size[1];
const UMat& inp = inputs[ii];
UMat& out = outputs[ii];
int numImg = inp.size[0];
int inpH = inp.size[2], inpW = inp.size[3];
int outH = out.size[2], outW = out.size[3];
MatShape inpshape = shape(numImg*inpCn, inpH*inpW);
MatShape outshape = shape(numImg*outCn, outH*outW);
UMat convBlob = inputs[ii].reshape(1, inpshape.size(), &inpshape[0]);
UMat decnBlob = out.reshape(1, outshape.size(), &outshape[0]);
int rows = internals[0].rows / ngroups;
for (int n = 0; n < numImg; n++)
{
for (int g = 0; g < ngroups; g++)
{
UMat colMat = internals[0].rowRange(_Range(g * rows, rows));
UMat convMat = convBlob.rowRange(_Range((g + n * ngroups) * inpGroupCn, inpGroupCn));
UMat wghtMat = umat_weights.colRange(_Range(g * inpGroupCn, inpGroupCn));
gemm(wghtMat, convMat, 1, noArray(), 0, colMat, 0);
}
for (int g = 0; g < ngroups; g++)
{
int total = outGroupCn * decnBlob.cols;
int index = 0;
int height_col = (outH + 2 * pad.height - kernel.height) / stride.height + 1;
int width_col = (outW + 2 * pad.width - kernel.width) / stride.width + 1;
int coeff_h = (1 - stride.height * kernel.width * height_col) * width_col;
int coeff_w = (1 - stride.width * height_col * width_col);
ocl::Kernel k("col2im", ocl::dnn::col2im_oclsrc, buildopt);
k.set(index++, total);
k.set(index++, ocl::KernelArg::PtrReadOnly(internals[0]));
k.set(index++, (int)(g * rows * internals[0].cols));
k.set(index++, outGroupCn);
k.set(index++, outH);
k.set(index++, outW);
k.set(index++, height_col);
k.set(index++, width_col);
k.set(index++, coeff_h);
k.set(index++, coeff_w);
k.set(index++, ocl::KernelArg::PtrReadOnly(umat_biases));
k.set(index++, (int)(g * outGroupCn * umat_biases.cols));
k.set(index++, ocl::KernelArg::PtrWriteOnly(decnBlob));
k.set(index++, (int)((g + n * ngroups) * outGroupCn * decnBlob.cols));
size_t global[] = { (size_t)total };
bool ret = k.run(1, global, NULL, false);
if (!ret)
return false;
}
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}

View File

@ -1,62 +1,79 @@
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
__kernel void col2im(const int n, __global const T* data_col, const int col_offset,
const int height, const int width, const int channels,
const int patch_h, const int patch_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
__global T* data_im, const int img_offset)
__kernel void col2im(const int n, __global const T* data_col,
const int data_col_offset,
const int channels,
const int height, const int width,
const int height_col, const int width_col,
const int coeff_h, const int coeff_w,
__global const T* biasvec,
const int bias_offset,
__global T* data_im,
const int data_im_offset)
{
data_col = data_col + col_offset;
data_im = data_im + img_offset;
int index = get_global_id(0);
if(index < n) {
T val = 0;
int w = index % width + pad_w;
int h = (index / width) % height + pad_h;
int c = index / (width * height);
data_col = data_col + data_col_offset;
biasvec = biasvec + bias_offset;
data_im = data_im + data_im_offset;
int index = get_global_id(0);
// compute the start and end of the output
int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
int w_col_end = min(w / stride_w + 1, width_col);
int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
int h_col_end = min(h / stride_h + 1, height_col);
if(index < n)
{
T val = 0.f;
int w = index % width + PAD_W;
int h = (index / width) % height + PAD_H;
int c = index / (width * height);
int h_col_start = (h < KERNEL_H) ? 0 : (h - KERNEL_H) / STRIDE_H + 1;
int h_col_end = min(h / STRIDE_H + 1, height_col);
int plane_size_col = height_col * width_col;
int offset = (c * KERNEL_H * KERNEL_W + h * KERNEL_W + w) * plane_size_col;
// equivalent implementation
int offset =
(c * patch_h * patch_w + h * patch_w + w) * height_col * width_col;
int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
}
int w_col_start = (w < KERNEL_W) ? 0 : (w - KERNEL_W) / STRIDE_W + 1;
int w_col_end = min(w / STRIDE_W + 1, width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col)
for (int w_col = w_col_start; w_col < w_col_end; ++w_col)
val += data_col[offset + h_col * coeff_h + w_col * coeff_w];
data_im[index] = val + biasvec[c];
}
data_im[index] = val;
}
}

View File

@ -167,6 +167,11 @@ TEST(Layer_Test_DeConvolution, Accuracy)
testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false);
}
OCL_TEST(Layer_Test_DeConvolution, Accuracy)
{
testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_OPENCL, true, false);
}
TEST(Layer_Test_InnerProduct, Accuracy)
{
testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true);

View File

@ -171,6 +171,11 @@ TEST(Test_TensorFlow, deconvolution)
runTensorFlowNet("deconvolution");
}
OCL_TEST(Test_TensorFlow, deconvolution)
{
runTensorFlowNet("deconvolution", DNN_TARGET_OPENCL);
}
TEST(Test_TensorFlow, matmul)
{
runTensorFlowNet("matmul");

View File

@ -165,6 +165,11 @@ TEST(Torch_Importer, run_deconv)
runTorchNet("net_deconv");
}
OCL_TEST(Torch_Importer, run_deconv)
{
runTorchNet("net_deconv", DNN_TARGET_OPENCL);
}
TEST(Torch_Importer, run_batch_norm)
{
runTorchNet("net_batch_norm", DNN_TARGET_CPU, "", false, true);