mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 19:20:28 +08:00
ocl support for Deconvolution layer
Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
ec353dbdda
commit
2124361ff7
@ -46,6 +46,7 @@
|
||||
#include "opencv2/core/hal/hal.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include <iostream>
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
using namespace cv::dnn::ocl4dnn;
|
||||
@ -1051,6 +1052,8 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
|
||||
{
|
||||
public:
|
||||
Mat weightsMat, biasesMat;
|
||||
UMat umat_weights;
|
||||
UMat umat_biases;
|
||||
|
||||
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const
|
||||
{
|
||||
@ -1341,11 +1344,107 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
internals_.getUMatVector(internals);
|
||||
|
||||
int outCn = numOutput;
|
||||
int inpCn = inputs[0].size[1];
|
||||
|
||||
if (is1x1())
|
||||
return false;
|
||||
|
||||
if (umat_weights.empty())
|
||||
{
|
||||
transpose(blobs[0].reshape(1, inpCn), umat_weights);
|
||||
umat_biases = hasBias() ? blobs[1].reshape(1, outCn).getUMat(ACCESS_READ) :
|
||||
UMat::zeros(outCn, 1, CV_32F);
|
||||
}
|
||||
|
||||
String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type()));
|
||||
buildopt += format("-DPAD_H=%d -DPAD_W=%d -DKERNEL_H=%d -DKERNEL_W=%d -DSTRIDE_H=%d -DSTRIDE_W=%d ",
|
||||
pad.height, pad.width, kernel.height, kernel.width, stride.height, stride.width);
|
||||
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
int ngroups = outCn / blobs[0].size[1];
|
||||
int inpGroupCn = inpCn / ngroups;
|
||||
int outGroupCn = blobs[0].size[1];
|
||||
const UMat& inp = inputs[ii];
|
||||
UMat& out = outputs[ii];
|
||||
int numImg = inp.size[0];
|
||||
int inpH = inp.size[2], inpW = inp.size[3];
|
||||
int outH = out.size[2], outW = out.size[3];
|
||||
|
||||
MatShape inpshape = shape(numImg*inpCn, inpH*inpW);
|
||||
MatShape outshape = shape(numImg*outCn, outH*outW);
|
||||
UMat convBlob = inputs[ii].reshape(1, inpshape.size(), &inpshape[0]);
|
||||
UMat decnBlob = out.reshape(1, outshape.size(), &outshape[0]);
|
||||
int rows = internals[0].rows / ngroups;
|
||||
|
||||
for (int n = 0; n < numImg; n++)
|
||||
{
|
||||
for (int g = 0; g < ngroups; g++)
|
||||
{
|
||||
UMat colMat = internals[0].rowRange(_Range(g * rows, rows));
|
||||
UMat convMat = convBlob.rowRange(_Range((g + n * ngroups) * inpGroupCn, inpGroupCn));
|
||||
UMat wghtMat = umat_weights.colRange(_Range(g * inpGroupCn, inpGroupCn));
|
||||
gemm(wghtMat, convMat, 1, noArray(), 0, colMat, 0);
|
||||
}
|
||||
|
||||
for (int g = 0; g < ngroups; g++)
|
||||
{
|
||||
int total = outGroupCn * decnBlob.cols;
|
||||
int index = 0;
|
||||
int height_col = (outH + 2 * pad.height - kernel.height) / stride.height + 1;
|
||||
int width_col = (outW + 2 * pad.width - kernel.width) / stride.width + 1;
|
||||
int coeff_h = (1 - stride.height * kernel.width * height_col) * width_col;
|
||||
int coeff_w = (1 - stride.width * height_col * width_col);
|
||||
|
||||
ocl::Kernel k("col2im", ocl::dnn::col2im_oclsrc, buildopt);
|
||||
k.set(index++, total);
|
||||
k.set(index++, ocl::KernelArg::PtrReadOnly(internals[0]));
|
||||
k.set(index++, (int)(g * rows * internals[0].cols));
|
||||
k.set(index++, outGroupCn);
|
||||
k.set(index++, outH);
|
||||
k.set(index++, outW);
|
||||
k.set(index++, height_col);
|
||||
k.set(index++, width_col);
|
||||
k.set(index++, coeff_h);
|
||||
k.set(index++, coeff_w);
|
||||
k.set(index++, ocl::KernelArg::PtrReadOnly(umat_biases));
|
||||
k.set(index++, (int)(g * outGroupCn * umat_biases.cols));
|
||||
k.set(index++, ocl::KernelArg::PtrWriteOnly(decnBlob));
|
||||
k.set(index++, (int)((g + n * ngroups) * outGroupCn * decnBlob.cols));
|
||||
|
||||
size_t global[] = { (size_t)total };
|
||||
bool ret = k.run(1, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
|
@ -1,62 +1,79 @@
|
||||
/*************************************************************************************
|
||||
* Copyright (c) 2015, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************************/
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
__kernel void col2im(const int n, __global const T* data_col, const int col_offset,
|
||||
const int height, const int width, const int channels,
|
||||
const int patch_h, const int patch_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int height_col, const int width_col,
|
||||
__global T* data_im, const int img_offset)
|
||||
__kernel void col2im(const int n, __global const T* data_col,
|
||||
const int data_col_offset,
|
||||
const int channels,
|
||||
const int height, const int width,
|
||||
const int height_col, const int width_col,
|
||||
const int coeff_h, const int coeff_w,
|
||||
__global const T* biasvec,
|
||||
const int bias_offset,
|
||||
__global T* data_im,
|
||||
const int data_im_offset)
|
||||
{
|
||||
data_col = data_col + col_offset;
|
||||
data_im = data_im + img_offset;
|
||||
int index = get_global_id(0);
|
||||
if(index < n) {
|
||||
T val = 0;
|
||||
int w = index % width + pad_w;
|
||||
int h = (index / width) % height + pad_h;
|
||||
int c = index / (width * height);
|
||||
data_col = data_col + data_col_offset;
|
||||
biasvec = biasvec + bias_offset;
|
||||
data_im = data_im + data_im_offset;
|
||||
int index = get_global_id(0);
|
||||
|
||||
// compute the start and end of the output
|
||||
int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
|
||||
int w_col_end = min(w / stride_w + 1, width_col);
|
||||
int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
|
||||
int h_col_end = min(h / stride_h + 1, height_col);
|
||||
if(index < n)
|
||||
{
|
||||
T val = 0.f;
|
||||
int w = index % width + PAD_W;
|
||||
int h = (index / width) % height + PAD_H;
|
||||
int c = index / (width * height);
|
||||
int h_col_start = (h < KERNEL_H) ? 0 : (h - KERNEL_H) / STRIDE_H + 1;
|
||||
int h_col_end = min(h / STRIDE_H + 1, height_col);
|
||||
int plane_size_col = height_col * width_col;
|
||||
int offset = (c * KERNEL_H * KERNEL_W + h * KERNEL_W + w) * plane_size_col;
|
||||
|
||||
// equivalent implementation
|
||||
int offset =
|
||||
(c * patch_h * patch_w + h * patch_w + w) * height_col * width_col;
|
||||
int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col;
|
||||
int coeff_w_col = (1 - stride_w * height_col * width_col);
|
||||
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
|
||||
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
|
||||
val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
|
||||
}
|
||||
int w_col_start = (w < KERNEL_W) ? 0 : (w - KERNEL_W) / STRIDE_W + 1;
|
||||
int w_col_end = min(w / STRIDE_W + 1, width_col);
|
||||
|
||||
for (int h_col = h_col_start; h_col < h_col_end; ++h_col)
|
||||
for (int w_col = w_col_start; w_col < w_col_end; ++w_col)
|
||||
val += data_col[offset + h_col * coeff_h + w_col * coeff_w];
|
||||
|
||||
data_im[index] = val + biasvec[c];
|
||||
}
|
||||
data_im[index] = val;
|
||||
}
|
||||
}
|
||||
|
@ -167,6 +167,11 @@ TEST(Layer_Test_DeConvolution, Accuracy)
|
||||
testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false);
|
||||
}
|
||||
|
||||
OCL_TEST(Layer_Test_DeConvolution, Accuracy)
|
||||
{
|
||||
testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_OPENCL, true, false);
|
||||
}
|
||||
|
||||
TEST(Layer_Test_InnerProduct, Accuracy)
|
||||
{
|
||||
testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true);
|
||||
|
@ -171,6 +171,11 @@ TEST(Test_TensorFlow, deconvolution)
|
||||
runTensorFlowNet("deconvolution");
|
||||
}
|
||||
|
||||
OCL_TEST(Test_TensorFlow, deconvolution)
|
||||
{
|
||||
runTensorFlowNet("deconvolution", DNN_TARGET_OPENCL);
|
||||
}
|
||||
|
||||
TEST(Test_TensorFlow, matmul)
|
||||
{
|
||||
runTensorFlowNet("matmul");
|
||||
|
@ -165,6 +165,11 @@ TEST(Torch_Importer, run_deconv)
|
||||
runTorchNet("net_deconv");
|
||||
}
|
||||
|
||||
OCL_TEST(Torch_Importer, run_deconv)
|
||||
{
|
||||
runTorchNet("net_deconv", DNN_TARGET_OPENCL);
|
||||
}
|
||||
|
||||
TEST(Torch_Importer, run_batch_norm)
|
||||
{
|
||||
runTorchNet("net_batch_norm", DNN_TARGET_CPU, "", false, true);
|
||||
|
Loading…
Reference in New Issue
Block a user